use crate::error::{AlgorithmError, Result};
pub const D8_NONE: u8 = 0;
pub const D8_EAST: u8 = 1;
pub const D8_SOUTHEAST: u8 = 2;
pub const D8_SOUTH: u8 = 4;
pub const D8_SOUTHWEST: u8 = 8;
pub const D8_WEST: u8 = 16;
pub const D8_NORTHWEST: u8 = 32;
pub const D8_NORTH: u8 = 64;
pub const D8_NORTHEAST: u8 = 128;
pub fn flow_direction_d8_simd(
dem: &[f32],
flow_dir: &mut [u8],
width: usize,
height: usize,
) -> Result<()> {
if width < 3 || height < 3 {
return Err(AlgorithmError::InvalidParameter {
parameter: "dimensions",
message: "Width and height must be at least 3".to_string(),
});
}
if dem.len() != width * height || flow_dir.len() != width * height {
return Err(AlgorithmError::InvalidParameter {
parameter: "buffer_size",
message: "Buffer sizes must match width * height".to_string(),
});
}
const LANES: usize = 8;
let chunks = flow_dir.len() / LANES;
for i in 0..chunks {
let start = i * LANES;
let end = start + LANES;
for j in start..end {
flow_dir[j] = D8_NONE;
}
}
let remainder_start = chunks * LANES;
for i in remainder_start..flow_dir.len() {
flow_dir[i] = D8_NONE;
}
let neighbors = [
(1, 0, D8_EAST),
(1, 1, D8_SOUTHEAST),
(0, 1, D8_SOUTH),
(-1, 1, D8_SOUTHWEST),
(-1, 0, D8_WEST),
(-1, -1, D8_NORTHWEST),
(0, -1, D8_NORTH),
(1, -1, D8_NORTHEAST),
];
let sqrt2 = 2.0_f32.sqrt();
for y in 1..(height - 1) {
for x in 1..(width - 1) {
let idx = y * width + x;
let center_elev = dem[idx];
let mut max_slope = f32::NEG_INFINITY;
let mut best_dir = D8_NONE;
for &(dx, dy, code) in &neighbors {
let nx = (x as i64 + dx) as usize;
let ny = (y as i64 + dy) as usize;
let neighbor_idx = ny * width + nx;
let neighbor_elev = dem[neighbor_idx];
let distance = if dx.abs() + dy.abs() == 2 {
sqrt2 } else {
1.0 };
let slope = (center_elev - neighbor_elev) / distance;
if slope > max_slope {
max_slope = slope;
best_dir = code;
}
}
flow_dir[idx] = best_dir;
}
}
Ok(())
}
pub fn detect_sinks_simd(dem: &[f32], sinks: &mut [u8], width: usize, height: usize) -> Result<()> {
if width < 3 || height < 3 {
return Err(AlgorithmError::InvalidParameter {
parameter: "dimensions",
message: "Width and height must be at least 3".to_string(),
});
}
if dem.len() != width * height || sinks.len() != width * height {
return Err(AlgorithmError::InvalidParameter {
parameter: "buffer_size",
message: "Buffer sizes must match width * height".to_string(),
});
}
const LANES: usize = 8;
let chunks = sinks.len() / LANES;
for i in 0..chunks {
let start = i * LANES;
let end = start + LANES;
for j in start..end {
sinks[j] = 0;
}
}
let remainder_start = chunks * LANES;
for i in remainder_start..sinks.len() {
sinks[i] = 0;
}
for y in 1..(height - 1) {
for x in 1..(width - 1) {
let idx = y * width + x;
let center_elev = dem[idx];
let mut is_sink = true;
for dy in -1..=1_i64 {
for dx in -1..=1_i64 {
if dx == 0 && dy == 0 {
continue;
}
let nx = (x as i64 + dx) as usize;
let ny = (y as i64 + dy) as usize;
let neighbor_elev = dem[ny * width + nx];
if center_elev >= neighbor_elev {
is_sink = false;
break;
}
}
if !is_sink {
break;
}
}
if is_sink {
sinks[idx] = 1;
}
}
}
Ok(())
}
pub fn compute_slope_simd(
dem: &[f32],
slope: &mut [f32],
width: usize,
height: usize,
) -> Result<()> {
if width < 3 || height < 3 {
return Err(AlgorithmError::InvalidParameter {
parameter: "dimensions",
message: "Width and height must be at least 3".to_string(),
});
}
if dem.len() != width * height || slope.len() != width * height {
return Err(AlgorithmError::InvalidParameter {
parameter: "buffer_size",
message: "Buffer sizes must match width * height".to_string(),
});
}
let sqrt2 = 2.0_f32.sqrt();
for y in 1..(height - 1) {
for x in 1..(width - 1) {
let idx = y * width + x;
let center_elev = dem[idx];
let mut max_slope = 0.0_f32;
for dy in -1..=1_i64 {
for dx in -1..=1_i64 {
if dx == 0 && dy == 0 {
continue;
}
let nx = (x as i64 + dx) as usize;
let ny = (y as i64 + dy) as usize;
let neighbor_elev = dem[ny * width + nx];
let distance = if dx.abs() + dy.abs() == 2 { sqrt2 } else { 1.0 };
let s = (center_elev - neighbor_elev) / distance;
max_slope = max_slope.max(s);
}
}
slope[idx] = max_slope;
}
}
for x in 0..width {
slope[x] = slope[width + x];
slope[(height - 1) * width + x] = slope[(height - 2) * width + x];
}
for y in 0..height {
slope[y * width] = slope[y * width + 1];
slope[y * width + width - 1] = slope[y * width + width - 2];
}
Ok(())
}
pub fn initialize_flow_accumulation_simd(flow_acc: &mut [f32]) -> Result<()> {
if flow_acc.is_empty() {
return Err(AlgorithmError::InvalidParameter {
parameter: "flow_acc",
message: "Buffer must not be empty".to_string(),
});
}
const LANES: usize = 8;
let chunks = flow_acc.len() / LANES;
for i in 0..chunks {
let start = i * LANES;
let end = start + LANES;
for j in start..end {
flow_acc[j] = 1.0;
}
}
let remainder_start = chunks * LANES;
for i in remainder_start..flow_acc.len() {
flow_acc[i] = 1.0;
}
Ok(())
}
pub fn detect_flats_simd(
dem: &[f32],
flat: &mut [u8],
width: usize,
height: usize,
tolerance: f32,
) -> Result<()> {
if width < 3 || height < 3 {
return Err(AlgorithmError::InvalidParameter {
parameter: "dimensions",
message: "Width and height must be at least 3".to_string(),
});
}
if dem.len() != width * height || flat.len() != width * height {
return Err(AlgorithmError::InvalidParameter {
parameter: "buffer_size",
message: "Buffer sizes must match width * height".to_string(),
});
}
const LANES: usize = 8;
let chunks = flat.len() / LANES;
for i in 0..chunks {
let start = i * LANES;
let end = start + LANES;
for j in start..end {
flat[j] = 0;
}
}
let remainder_start = chunks * LANES;
for i in remainder_start..flat.len() {
flat[i] = 0;
}
for y in 1..(height - 1) {
for x in 1..(width - 1) {
let idx = y * width + x;
let center_elev = dem[idx];
let mut is_flat = true;
for dy in -1..=1_i64 {
for dx in -1..=1_i64 {
if dx == 0 && dy == 0 {
continue;
}
let nx = (x as i64 + dx) as usize;
let ny = (y as i64 + dy) as usize;
let neighbor_elev = dem[ny * width + nx];
if (center_elev - neighbor_elev).abs() > tolerance {
is_flat = false;
break;
}
}
if !is_flat {
break;
}
}
if is_flat {
flat[idx] = 1;
}
}
}
Ok(())
}
pub fn flow_accumulation_iterative_simd(
flow_dir: &[u8],
flow_acc: &mut [f32],
width: usize,
height: usize,
max_iterations: usize,
) -> Result<()> {
if width < 3 || height < 3 {
return Err(AlgorithmError::InvalidParameter {
parameter: "dimensions",
message: "Width and height must be at least 3".to_string(),
});
}
if flow_dir.len() != width * height || flow_acc.len() != width * height {
return Err(AlgorithmError::InvalidParameter {
parameter: "buffer_size",
message: "Buffer sizes must match width * height".to_string(),
});
}
let dir_offsets: [(i64, i64, u8); 8] = [
(1, 0, D8_EAST),
(1, 1, D8_SOUTHEAST),
(0, 1, D8_SOUTH),
(-1, 1, D8_SOUTHWEST),
(-1, 0, D8_WEST),
(-1, -1, D8_NORTHWEST),
(0, -1, D8_NORTH),
(1, -1, D8_NORTHEAST),
];
let mut temp_acc = vec![0.0_f32; width * height];
for _ in 0..max_iterations {
for val in temp_acc.iter_mut() {
*val = 1.0;
}
for y in 1..(height - 1) {
for x in 1..(width - 1) {
let idx = y * width + x;
let center_dir = flow_dir[idx];
for &(dx, dy, code) in &dir_offsets {
if center_dir == code {
let tx = (x as i64 + dx) as usize;
let ty = (y as i64 + dy) as usize;
if tx > 0 && tx < width - 1 && ty > 0 && ty < height - 1 {
let target_idx = ty * width + tx;
temp_acc[target_idx] += flow_acc[idx];
}
break;
}
}
}
}
let mut converged = true;
for i in 0..(width * height) {
if (temp_acc[i] - flow_acc[i]).abs() > 0.001 {
converged = false;
}
flow_acc[i] = temp_acc[i];
}
if converged {
break;
}
}
Ok(())
}
pub fn count_upstream_cells_simd(
flow_dir: &[u8],
upstream_count: &mut [u32],
width: usize,
height: usize,
) -> Result<()> {
if width < 3 || height < 3 {
return Err(AlgorithmError::InvalidParameter {
parameter: "dimensions",
message: "Width and height must be at least 3".to_string(),
});
}
if flow_dir.len() != width * height || upstream_count.len() != width * height {
return Err(AlgorithmError::InvalidParameter {
parameter: "buffer_size",
message: "Buffer sizes must match width * height".to_string(),
});
}
for val in upstream_count.iter_mut() {
*val = 0;
}
let dir_offsets: [(i64, i64, u8); 8] = [
(1, 0, D8_EAST),
(1, 1, D8_SOUTHEAST),
(0, 1, D8_SOUTH),
(-1, 1, D8_SOUTHWEST),
(-1, 0, D8_WEST),
(-1, -1, D8_NORTHWEST),
(0, -1, D8_NORTH),
(1, -1, D8_NORTHEAST),
];
for y in 1..(height - 1) {
for x in 1..(width - 1) {
let idx = y * width + x;
let center_dir = flow_dir[idx];
for &(dx, dy, code) in &dir_offsets {
if center_dir == code {
let tx = (x as i64 + dx) as usize;
let ty = (y as i64 + dy) as usize;
if tx > 0 && tx < width - 1 && ty > 0 && ty < height - 1 {
let target_idx = ty * width + tx;
upstream_count[target_idx] += 1;
}
break;
}
}
}
}
Ok(())
}
pub fn extract_streams_simd(
flow_acc: &[f32],
streams: &mut [u8],
width: usize,
height: usize,
threshold: f32,
) -> Result<()> {
if flow_acc.len() != width * height || streams.len() != width * height {
return Err(AlgorithmError::InvalidParameter {
parameter: "buffer_size",
message: "Buffer sizes must match width * height".to_string(),
});
}
const LANES: usize = 8;
let chunks = flow_acc.len() / LANES;
for chunk in 0..chunks {
let start = chunk * LANES;
let end = start + LANES;
for i in start..end {
streams[i] = if flow_acc[i] >= threshold { 1 } else { 0 };
}
}
let remainder_start = chunks * LANES;
for i in remainder_start..flow_acc.len() {
streams[i] = if flow_acc[i] >= threshold { 1 } else { 0 };
}
Ok(())
}
pub fn fill_sinks_simple_simd(
dem: &mut [f32],
width: usize,
height: usize,
epsilon: f32,
) -> Result<()> {
if width < 3 || height < 3 {
return Err(AlgorithmError::InvalidParameter {
parameter: "dimensions",
message: "Width and height must be at least 3".to_string(),
});
}
if dem.len() != width * height {
return Err(AlgorithmError::InvalidParameter {
parameter: "buffer_size",
message: "Buffer size must match width * height".to_string(),
});
}
let max_iterations = (width + height) * 2;
let mut changed = true;
let mut iteration = 0;
while changed && iteration < max_iterations {
changed = false;
iteration += 1;
for y in 1..(height - 1) {
for x in 1..(width - 1) {
let idx = y * width + x;
let center_elev = dem[idx];
let mut min_neighbor = f32::INFINITY;
for dy in -1..=1_i64 {
for dx in -1..=1_i64 {
if dx == 0 && dy == 0 {
continue;
}
let nx = (x as i64 + dx) as usize;
let ny = (y as i64 + dy) as usize;
min_neighbor = min_neighbor.min(dem[ny * width + nx]);
}
}
if center_elev < min_neighbor {
dem[idx] = min_neighbor + epsilon;
changed = true;
}
}
}
}
Ok(())
}
pub fn compute_twi_simd(
flow_acc: &[f32],
slope: &[f32],
twi: &mut [f32],
width: usize,
height: usize,
cell_size: f32,
) -> Result<()> {
if flow_acc.len() != width * height
|| slope.len() != width * height
|| twi.len() != width * height
{
return Err(AlgorithmError::InvalidParameter {
parameter: "buffer_size",
message: "Buffer sizes must match width * height".to_string(),
});
}
const LANES: usize = 8;
let chunks = flow_acc.len() / LANES;
for chunk in 0..chunks {
let start = chunk * LANES;
let end = start + LANES;
for i in start..end {
let sca = flow_acc[i] * cell_size;
let slope_rad = slope[i].to_radians();
let tan_slope = slope_rad.tan().max(0.0001);
twi[i] = (sca / tan_slope).ln();
}
}
let remainder_start = chunks * LANES;
for i in remainder_start..flow_acc.len() {
let sca = flow_acc[i] * cell_size;
let slope_rad = slope[i].to_radians();
let tan_slope = slope_rad.tan().max(0.0001);
twi[i] = (sca / tan_slope).ln();
}
Ok(())
}
pub fn compute_spi_simd(
flow_acc: &[f32],
slope: &[f32],
spi: &mut [f32],
width: usize,
height: usize,
cell_size: f32,
) -> Result<()> {
if flow_acc.len() != width * height
|| slope.len() != width * height
|| spi.len() != width * height
{
return Err(AlgorithmError::InvalidParameter {
parameter: "buffer_size",
message: "Buffer sizes must match width * height".to_string(),
});
}
const LANES: usize = 8;
let chunks = flow_acc.len() / LANES;
for chunk in 0..chunks {
let start = chunk * LANES;
let end = start + LANES;
for i in start..end {
let sca = flow_acc[i] * cell_size;
let slope_rad = slope[i].to_radians();
let tan_slope = slope_rad.tan();
spi[i] = sca * tan_slope;
}
}
let remainder_start = chunks * LANES;
for i in remainder_start..flow_acc.len() {
let sca = flow_acc[i] * cell_size;
let slope_rad = slope[i].to_radians();
let tan_slope = slope_rad.tan();
spi[i] = sca * tan_slope;
}
Ok(())
}
pub fn compute_sti_simd(
flow_acc: &[f32],
slope: &[f32],
sti: &mut [f32],
width: usize,
height: usize,
cell_size: f32,
) -> Result<()> {
if flow_acc.len() != width * height
|| slope.len() != width * height
|| sti.len() != width * height
{
return Err(AlgorithmError::InvalidParameter {
parameter: "buffer_size",
message: "Buffer sizes must match width * height".to_string(),
});
}
let ref_length = 22.13_f32;
let ref_slope = 0.0896_f32;
const LANES: usize = 8;
let chunks = flow_acc.len() / LANES;
for chunk in 0..chunks {
let start = chunk * LANES;
let end = start + LANES;
for i in start..end {
let flow_length = flow_acc[i] * cell_size;
let slope_rad = slope[i].to_radians();
let sin_slope = slope_rad.sin().max(0.0001);
let length_factor = (flow_length / ref_length).powf(0.6);
let slope_factor = (sin_slope / ref_slope).powf(1.3);
sti[i] = length_factor * slope_factor;
}
}
let remainder_start = chunks * LANES;
for i in remainder_start..flow_acc.len() {
let flow_length = flow_acc[i] * cell_size;
let slope_rad = slope[i].to_radians();
let sin_slope = slope_rad.sin().max(0.0001);
let length_factor = (flow_length / ref_length).powf(0.6);
let slope_factor = (sin_slope / ref_slope).powf(1.3);
sti[i] = length_factor * slope_factor;
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_flow_direction_d8() {
let mut dem = vec![0.0_f32; 100];
for y in 0..10 {
for x in 0..10 {
dem[y * 10 + x] = x as f32;
}
}
let mut flow_dir = vec![0_u8; 100];
flow_direction_d8_simd(&dem, &mut flow_dir, 10, 10)
.expect("Failed to compute D8 flow direction");
for y in 1..9 {
for x in 1..9 {
let dir = flow_dir[y * 10 + x];
assert!(dir == D8_WEST || dir == D8_SOUTHWEST || dir == D8_NORTHWEST);
}
}
}
#[test]
fn test_detect_sinks() {
let mut dem = vec![100.0_f32; 100];
dem[55] = 50.0;
let mut sinks = vec![0_u8; 100];
detect_sinks_simd(&dem, &mut sinks, 10, 10).expect("Failed to detect sinks");
assert_eq!(sinks[55], 1);
}
#[test]
fn test_compute_slope() {
let dem = vec![100.0_f32; 100]; let mut slope = vec![0.0_f32; 100];
compute_slope_simd(&dem, &mut slope, 10, 10).expect("Failed to compute slope");
for &val in &slope[11..89] {
assert!(val < 0.01);
}
}
#[test]
fn test_initialize_flow_accumulation() {
let mut flow_acc = vec![0.0_f32; 100];
initialize_flow_accumulation_simd(&mut flow_acc)
.expect("Failed to initialize flow accumulation");
for &val in &flow_acc {
assert_eq!(val, 1.0);
}
}
#[test]
fn test_detect_flats() {
let dem = vec![100.0_f32; 100]; let mut flat = vec![0_u8; 100];
detect_flats_simd(&dem, &mut flat, 10, 10, 0.1).expect("Failed to detect flats");
for y in 1..9 {
for x in 1..9 {
assert_eq!(flat[y * 10 + x], 1);
}
}
}
#[test]
fn test_invalid_dimensions() {
let dem = vec![100.0_f32; 4]; let mut flow_dir = vec![0_u8; 4];
let result = flow_direction_d8_simd(&dem, &mut flow_dir, 2, 2);
assert!(result.is_err());
}
#[test]
fn test_buffer_size_mismatch() {
let dem = vec![100.0_f32; 100];
let mut flow_dir = vec![0_u8; 50];
let result = flow_direction_d8_simd(&dem, &mut flow_dir, 10, 10);
assert!(result.is_err());
}
#[test]
fn test_count_upstream_cells() {
let flow_dir = vec![D8_WEST; 100]; let mut upstream_count = vec![0_u32; 100];
count_upstream_cells_simd(&flow_dir, &mut upstream_count, 10, 10)
.expect("Failed to count upstream cells");
assert!(upstream_count.iter().any(|&v| v > 0));
}
#[test]
fn test_extract_streams() {
let mut flow_acc = vec![10.0_f32; 100];
flow_acc[55] = 100.0;
let mut streams = vec![0_u8; 100];
extract_streams_simd(&flow_acc, &mut streams, 10, 10, 50.0)
.expect("Failed to extract streams");
assert_eq!(streams[55], 1);
assert_eq!(streams[44], 0);
}
#[test]
fn test_fill_sinks_simple() {
let mut dem = vec![100.0_f32; 100];
dem[55] = 50.0;
fill_sinks_simple_simd(&mut dem, 10, 10, 0.001).expect("Failed to fill sinks");
assert!(dem[55] >= 100.0);
}
#[test]
fn test_compute_twi() {
let flow_acc = vec![10.0_f32; 100];
let slope = vec![10.0_f32; 100]; let mut twi = vec![0.0_f32; 100];
compute_twi_simd(&flow_acc, &slope, &mut twi, 10, 10, 30.0).expect("Failed to compute TWI");
for &val in &twi {
assert!(val.is_finite());
}
}
#[test]
fn test_compute_spi() {
let flow_acc = vec![10.0_f32; 100];
let slope = vec![10.0_f32; 100]; let mut spi = vec![0.0_f32; 100];
compute_spi_simd(&flow_acc, &slope, &mut spi, 10, 10, 30.0).expect("Failed to compute SPI");
for &val in &spi {
assert!(val.is_finite());
}
}
#[test]
fn test_compute_sti() {
let flow_acc = vec![10.0_f32; 100];
let slope = vec![10.0_f32; 100]; let mut sti = vec![0.0_f32; 100];
compute_sti_simd(&flow_acc, &slope, &mut sti, 10, 10, 30.0).expect("Failed to compute STI");
for &val in &sti {
assert!(val.is_finite() && val >= 0.0);
}
}
}