use crate::DType;
use numr::error::Result;
use numr::runtime::Runtime;
use numr::sparse::CsrData;
use numr::tensor::Tensor;
use crate::pde::types::{Grid2D, Grid3D};
pub fn assemble_neg_laplacian_2d_dirichlet<R>(
grid: &Grid2D,
boundary_values: &[f64],
rhs_data: &mut [f64],
device: &R::Device,
) -> Result<CsrData<R>>
where
R: Runtime<DType = DType>,
{
let nx = grid.nx;
let ny = grid.ny;
let n = nx * ny;
let dx2 = grid.dx * grid.dx;
let dy2 = grid.dy * grid.dy;
let is_boundary =
|i: usize, j: usize| -> bool { i == 0 || i == nx - 1 || j == 0 || j == ny - 1 };
let bval = |idx: usize| -> f64 {
if idx < boundary_values.len() {
boundary_values[idx]
} else {
0.0
}
};
let mut rows = Vec::with_capacity(5 * n);
let mut cols = Vec::with_capacity(5 * n);
let mut vals: Vec<f64> = Vec::with_capacity(5 * n);
for i in 0..nx {
for j in 0..ny {
let idx = i * ny + j;
if is_boundary(i, j) {
rows.push(idx as i64);
cols.push(idx as i64);
vals.push(1.0);
rhs_data[idx] = bval(idx);
} else {
let mut center = 0.0;
let left_idx = (i - 1) * ny + j;
if is_boundary(i - 1, j) {
rhs_data[idx] += bval(left_idx) / dx2;
} else {
rows.push(idx as i64);
cols.push(left_idx as i64);
vals.push(-1.0 / dx2);
}
center += 1.0 / dx2;
let right_idx = (i + 1) * ny + j;
if is_boundary(i + 1, j) {
rhs_data[idx] += bval(right_idx) / dx2;
} else {
rows.push(idx as i64);
cols.push(right_idx as i64);
vals.push(-1.0 / dx2);
}
center += 1.0 / dx2;
let bottom_idx = i * ny + (j - 1);
if is_boundary(i, j - 1) {
rhs_data[idx] += bval(bottom_idx) / dy2;
} else {
rows.push(idx as i64);
cols.push(bottom_idx as i64);
vals.push(-1.0 / dy2);
}
center += 1.0 / dy2;
let top_idx = i * ny + (j + 1);
if is_boundary(i, j + 1) {
rhs_data[idx] += bval(top_idx) / dy2;
} else {
rows.push(idx as i64);
cols.push(top_idx as i64);
vals.push(-1.0 / dy2);
}
center += 1.0 / dy2;
rows.push(idx as i64);
cols.push(idx as i64);
vals.push(center);
}
}
}
let nnz = rows.len();
let row_t = Tensor::<R>::from_slice(&rows, &[nnz], device);
let col_t = Tensor::<R>::from_slice(&cols, &[nnz], device);
let val_t = Tensor::<R>::from_slice(&vals, &[nnz], device);
coo_to_csr_sorted::<R>(&row_t, &col_t, &val_t, [n, n], device)
}
pub fn assemble_laplacian_2d<R>(grid: &Grid2D, device: &R::Device) -> Result<CsrData<R>>
where
R: Runtime<DType = DType>,
{
let nx = grid.nx;
let ny = grid.ny;
let n = nx * ny;
let dx2 = grid.dx * grid.dx;
let dy2 = grid.dy * grid.dy;
let mut rows = Vec::with_capacity(5 * n);
let mut cols = Vec::with_capacity(5 * n);
let mut vals: Vec<f64> = Vec::with_capacity(5 * n);
for i in 0..nx {
for j in 0..ny {
let idx = i * ny + j;
let mut center = 0.0;
if i > 0 {
rows.push(idx as i64);
cols.push((idx - ny) as i64);
vals.push(1.0 / dx2);
center -= 1.0 / dx2;
}
if i < nx - 1 {
rows.push(idx as i64);
cols.push((idx + ny) as i64);
vals.push(1.0 / dx2);
center -= 1.0 / dx2;
}
if j > 0 {
rows.push(idx as i64);
cols.push((idx - 1) as i64);
vals.push(1.0 / dy2);
center -= 1.0 / dy2;
}
if j < ny - 1 {
rows.push(idx as i64);
cols.push((idx + 1) as i64);
vals.push(1.0 / dy2);
center -= 1.0 / dy2;
}
rows.push(idx as i64);
cols.push(idx as i64);
vals.push(center);
}
}
let nnz = rows.len();
let row_t = Tensor::<R>::from_slice(&rows, &[nnz], device);
let col_t = Tensor::<R>::from_slice(&cols, &[nnz], device);
let val_t = Tensor::<R>::from_slice(&vals, &[nnz], device);
coo_to_csr_sorted::<R>(&row_t, &col_t, &val_t, [n, n], device)
}
pub fn assemble_laplacian_3d<R>(grid: &Grid3D, device: &R::Device) -> Result<CsrData<R>>
where
R: Runtime<DType = DType>,
{
let nx = grid.nx;
let ny = grid.ny;
let nz = grid.nz;
let n = nx * ny * nz;
let nyz = ny * nz;
let dx2 = grid.dx * grid.dx;
let dy2 = grid.dy * grid.dy;
let dz2 = grid.dz * grid.dz;
let mut rows = Vec::with_capacity(7 * n);
let mut cols = Vec::with_capacity(7 * n);
let mut vals: Vec<f64> = Vec::with_capacity(7 * n);
for i in 0..nx {
for j in 0..ny {
for k in 0..nz {
let idx = i * nyz + j * nz + k;
let mut center = 0.0;
if i > 0 {
rows.push(idx as i64);
cols.push((idx - nyz) as i64);
vals.push(1.0 / dx2);
center -= 1.0 / dx2;
}
if i < nx - 1 {
rows.push(idx as i64);
cols.push((idx + nyz) as i64);
vals.push(1.0 / dx2);
center -= 1.0 / dx2;
}
if j > 0 {
rows.push(idx as i64);
cols.push((idx - nz) as i64);
vals.push(1.0 / dy2);
center -= 1.0 / dy2;
}
if j < ny - 1 {
rows.push(idx as i64);
cols.push((idx + nz) as i64);
vals.push(1.0 / dy2);
center -= 1.0 / dy2;
}
if k > 0 {
rows.push(idx as i64);
cols.push((idx - 1) as i64);
vals.push(1.0 / dz2);
center -= 1.0 / dz2;
}
if k < nz - 1 {
rows.push(idx as i64);
cols.push((idx + 1) as i64);
vals.push(1.0 / dz2);
center -= 1.0 / dz2;
}
rows.push(idx as i64);
cols.push(idx as i64);
vals.push(center);
}
}
}
let nnz = rows.len();
let row_t = Tensor::<R>::from_slice(&rows, &[nnz], device);
let col_t = Tensor::<R>::from_slice(&cols, &[nnz], device);
let val_t = Tensor::<R>::from_slice(&vals, &[nnz], device);
coo_to_csr_sorted::<R>(&row_t, &col_t, &val_t, [n, n], device)
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum SideBc {
Dirichlet,
Neumann(f64),
Periodic,
}
pub fn assemble_neg_laplacian_2d_mixed<R>(
grid: &Grid2D,
sides: &[SideBc; 4],
dirichlet_values: &[f64],
rhs_data: &mut [f64],
device: &R::Device,
) -> Result<CsrData<R>>
where
R: Runtime<DType = DType>,
{
let nx = grid.nx;
let ny = grid.ny;
let n = nx * ny;
let dx = grid.dx;
let dy = grid.dy;
let dx2 = dx * dx;
let dy2 = dy * dy;
let [s_left, s_right, s_bottom, s_top] = *sides;
let is_dir = |i: usize, j: usize| -> bool {
(i == 0 && s_left == SideBc::Dirichlet)
|| (i == nx - 1 && s_right == SideBc::Dirichlet)
|| (j == 0 && s_bottom == SideBc::Dirichlet)
|| (j == ny - 1 && s_top == SideBc::Dirichlet)
};
let dval = |idx: usize| -> f64 { dirichlet_values.get(idx).copied().unwrap_or(0.0) };
let mut rows: Vec<i64> = Vec::with_capacity(5 * n);
let mut cols: Vec<i64> = Vec::with_capacity(5 * n);
let mut vals: Vec<f64> = Vec::with_capacity(5 * n);
#[allow(clippy::too_many_arguments)]
fn add_neighbour(
ni: usize,
nj: usize,
ny: usize,
inv_h2: f64,
idx: usize,
is_dir: &impl Fn(usize, usize) -> bool,
dval: &impl Fn(usize) -> f64,
rows: &mut Vec<i64>,
cols: &mut Vec<i64>,
vals: &mut Vec<f64>,
rhs_data: &mut [f64],
center: &mut f64,
) {
let nb = ni * ny + nj;
if is_dir(ni, nj) {
rhs_data[idx] += dval(nb) * inv_h2;
} else {
rows.push(idx as i64);
cols.push(nb as i64);
vals.push(-inv_h2);
}
*center += inv_h2;
}
let inv_dx2 = 1.0 / dx2;
let inv_dy2 = 1.0 / dy2;
for i in 0..nx {
for j in 0..ny {
let idx = i * ny + j;
if is_dir(i, j) {
rows.push(idx as i64);
cols.push(idx as i64);
vals.push(1.0);
rhs_data[idx] = dval(idx);
continue;
}
let on_left = i == 0;
let on_right = i == nx - 1;
let on_bottom = j == 0;
let on_top = j == ny - 1;
let mut center = 0.0f64;
if on_left {
if let SideBc::Neumann(g) = s_left {
center += 2.0 / dx2;
let nb = (i + 1) * ny + j;
if is_dir(i + 1, j) {
rhs_data[idx] += (2.0 / dx2) * dval(nb);
} else {
rows.push(idx as i64);
cols.push(nb as i64);
vals.push(-2.0 / dx2);
}
rhs_data[idx] += 2.0 * g / dx;
} else {
add_neighbour(
nx - 1,
j,
ny,
inv_dx2,
idx,
&is_dir,
&dval,
&mut rows,
&mut cols,
&mut vals,
rhs_data,
&mut center,
);
add_neighbour(
i + 1,
j,
ny,
inv_dx2,
idx,
&is_dir,
&dval,
&mut rows,
&mut cols,
&mut vals,
rhs_data,
&mut center,
);
}
} else if on_right {
if let SideBc::Neumann(g) = s_right {
center += 2.0 / dx2;
let nb = (i - 1) * ny + j;
if is_dir(i - 1, j) {
rhs_data[idx] += (2.0 / dx2) * dval(nb);
} else {
rows.push(idx as i64);
cols.push(nb as i64);
vals.push(-2.0 / dx2);
}
rhs_data[idx] += 2.0 * g / dx;
} else {
add_neighbour(
i - 1,
j,
ny,
inv_dx2,
idx,
&is_dir,
&dval,
&mut rows,
&mut cols,
&mut vals,
rhs_data,
&mut center,
);
add_neighbour(
0,
j,
ny,
inv_dx2,
idx,
&is_dir,
&dval,
&mut rows,
&mut cols,
&mut vals,
rhs_data,
&mut center,
);
}
} else {
add_neighbour(
i - 1,
j,
ny,
inv_dx2,
idx,
&is_dir,
&dval,
&mut rows,
&mut cols,
&mut vals,
rhs_data,
&mut center,
);
add_neighbour(
i + 1,
j,
ny,
inv_dx2,
idx,
&is_dir,
&dval,
&mut rows,
&mut cols,
&mut vals,
rhs_data,
&mut center,
);
}
if on_bottom {
if let SideBc::Neumann(g) = s_bottom {
center += 2.0 / dy2;
let nb = i * ny + (j + 1);
if is_dir(i, j + 1) {
rhs_data[idx] += (2.0 / dy2) * dval(nb);
} else {
rows.push(idx as i64);
cols.push(nb as i64);
vals.push(-2.0 / dy2);
}
rhs_data[idx] += 2.0 * g / dy;
} else {
add_neighbour(
i,
ny - 1,
ny,
inv_dy2,
idx,
&is_dir,
&dval,
&mut rows,
&mut cols,
&mut vals,
rhs_data,
&mut center,
);
add_neighbour(
i,
j + 1,
ny,
inv_dy2,
idx,
&is_dir,
&dval,
&mut rows,
&mut cols,
&mut vals,
rhs_data,
&mut center,
);
}
} else if on_top {
if let SideBc::Neumann(g) = s_top {
center += 2.0 / dy2;
let nb = i * ny + (j - 1);
if is_dir(i, j - 1) {
rhs_data[idx] += (2.0 / dy2) * dval(nb);
} else {
rows.push(idx as i64);
cols.push(nb as i64);
vals.push(-2.0 / dy2);
}
rhs_data[idx] += 2.0 * g / dy;
} else {
add_neighbour(
i,
j - 1,
ny,
inv_dy2,
idx,
&is_dir,
&dval,
&mut rows,
&mut cols,
&mut vals,
rhs_data,
&mut center,
);
add_neighbour(
i,
0,
ny,
inv_dy2,
idx,
&is_dir,
&dval,
&mut rows,
&mut cols,
&mut vals,
rhs_data,
&mut center,
);
}
} else {
add_neighbour(
i,
j - 1,
ny,
inv_dy2,
idx,
&is_dir,
&dval,
&mut rows,
&mut cols,
&mut vals,
rhs_data,
&mut center,
);
add_neighbour(
i,
j + 1,
ny,
inv_dy2,
idx,
&is_dir,
&dval,
&mut rows,
&mut cols,
&mut vals,
rhs_data,
&mut center,
);
}
rows.push(idx as i64);
cols.push(idx as i64);
vals.push(center);
}
}
coo_to_csr_unsorted::<R>(&rows, &cols, &vals, [n, n], device)
}
fn coo_to_csr_unsorted<R: Runtime<DType = DType>>(
row_vec: &[i64],
col_vec: &[i64],
val_vec: &[f64],
shape: [usize; 2],
device: &R::Device,
) -> Result<CsrData<R>> {
let [nrows, _ncols] = shape;
let nnz = row_vec.len();
let mut order: Vec<usize> = (0..nnz).collect();
order.sort_by_key(|&k| (row_vec[k], col_vec[k]));
let mut sorted_rows = vec![0i64; nnz];
let mut sorted_cols = vec![0i64; nnz];
let mut sorted_vals = vec![0.0f64; nnz];
for (dst, &src) in order.iter().enumerate() {
sorted_rows[dst] = row_vec[src];
sorted_cols[dst] = col_vec[src];
sorted_vals[dst] = val_vec[src];
}
let mut row_ptrs = vec![0i64; nrows + 1];
for &r in &sorted_rows {
row_ptrs[r as usize + 1] += 1;
}
for i in 1..=nrows {
row_ptrs[i] += row_ptrs[i - 1];
}
debug_assert_eq!(row_ptrs[nrows] as usize, nnz);
let rp_tensor = Tensor::<R>::from_slice(&row_ptrs, &[nrows + 1], device);
let col_tensor = Tensor::<R>::from_slice(&sorted_cols, &[nnz], device);
let val_tensor = Tensor::<R>::from_slice(&sorted_vals, &[nnz], device);
CsrData::new(rp_tensor, col_tensor, val_tensor, shape)
}
fn coo_to_csr_sorted<R: Runtime<DType = DType>>(
row_indices: &Tensor<R>,
col_indices: &Tensor<R>,
values: &Tensor<R>,
shape: [usize; 2],
device: &R::Device,
) -> Result<CsrData<R>> {
let [nrows, _ncols] = shape;
let rows_vec: Vec<i64> = row_indices.to_vec();
let nnz = rows_vec.len();
let mut row_ptrs = vec![0i64; nrows + 1];
for &r in &rows_vec {
row_ptrs[r as usize + 1] += 1;
}
for i in 1..=nrows {
row_ptrs[i] += row_ptrs[i - 1];
}
debug_assert_eq!(row_ptrs[nrows] as usize, nnz);
let rp_tensor = Tensor::<R>::from_slice(&row_ptrs, &[nrows + 1], device);
CsrData::new(rp_tensor, col_indices.clone(), values.clone(), shape)
}