use crate::core::{
Image, CameraPose, CameraIntrinsics, Point2, Point3, Result
};
use crate::mvs::stereo::DepthMap;
use nalgebra::{Vector3, Point3 as NalgebraPoint3};
use rand::Rng;
#[derive(Debug)]
pub struct DepthEstimator {
config: DepthEstimationConfig,
}
#[derive(Debug, Clone)]
pub struct DepthEstimationConfig {
pub patchmatch_iterations: usize,
pub window_size: usize,
pub min_depth: f32,
pub max_depth: f32,
pub depth_samples: usize,
pub normal_samples: usize,
pub propagation_radius: usize,
pub random_search_range: f32,
}
impl Default for DepthEstimationConfig {
fn default() -> Self {
Self {
patchmatch_iterations: 3,
window_size: 5,
min_depth: 0.1,
max_depth: 100.0,
depth_samples: 64,
normal_samples: 16,
propagation_radius: 1,
random_search_range: 0.5,
}
}
}
#[derive(Debug, Clone)]
pub struct DepthHypothesis {
pub depth: f32,
pub normal: Vector3<f32>,
pub cost: f32,
}
#[derive(Debug)]
pub struct MultiViewDepthResult {
pub depth_map: DepthMap,
pub normal_map: NormalMap,
pub cost_map: CostMap,
}
#[derive(Debug, Clone)]
pub struct NormalMap {
pub width: u32,
pub height: u32,
pub data: Vec<Vector3<f32>>,
}
#[derive(Debug, Clone)]
pub struct CostMap {
pub width: u32,
pub height: u32,
pub data: Vec<f32>,
}
#[derive(Debug, Clone)]
pub struct ViewInfo {
pub image: Image,
pub intrinsics: CameraIntrinsics,
pub pose: CameraPose,
}
impl DepthEstimator {
pub fn new(config: DepthEstimationConfig) -> Self {
Self { config }
}
pub fn estimate_depth(
&self,
reference_view: &ViewInfo,
source_views: &[ViewInfo],
) -> Result<MultiViewDepthResult> {
let width = reference_view.image.size.0;
let height = reference_view.image.size.1;
let mut depth_hypotheses = self.initialize_depth_hypotheses(width, height)?;
for iteration in 0..self.config.patchmatch_iterations {
println!("PatchMatch iteration {}/{}", iteration + 1, self.config.patchmatch_iterations);
self.propagation_step(
&mut depth_hypotheses,
reference_view,
source_views,
width,
height,
)?;
self.random_search_step(
&mut depth_hypotheses,
reference_view,
source_views,
width,
height,
)?;
}
self.post_process_depth(&mut depth_hypotheses, width, height)?;
let depth_map = self.build_depth_map(&depth_hypotheses, reference_view, width, height)?;
let normal_map = self.build_normal_map(&depth_hypotheses, width, height);
let cost_map = self.build_cost_map(&depth_hypotheses, width, height);
Ok(MultiViewDepthResult {
depth_map,
normal_map,
cost_map,
})
}
fn initialize_depth_hypotheses(
&self,
width: u32,
height: u32,
) -> Result<Vec<Vec<DepthHypothesis>>> {
let mut rng = rand::thread_rng();
let mut hypotheses = Vec::with_capacity(height as usize);
for _y in 0..height {
let mut row = Vec::with_capacity(width as usize);
for _x in 0..width {
let depth = rng.gen_range(self.config.min_depth..self.config.max_depth);
let normal = self.random_normal(&mut rng);
row.push(DepthHypothesis {
depth,
normal,
cost: f32::MAX,
});
}
hypotheses.push(row);
}
Ok(hypotheses)
}
fn random_normal(&self, rng: &mut impl Rng) -> Vector3<f32> {
loop {
let x = rng.gen_range(-1.0..1.0);
let y = rng.gen_range(-1.0..1.0);
let z = rng.gen_range(-1.0..1.0);
let normal = Vector3::<f32>::new(x as f32, y as f32, z as f32);
let norm = normal.norm();
if norm > 0.1 {
return normal / norm;
}
}
}
fn propagation_step(
&self,
hypotheses: &mut Vec<Vec<DepthHypothesis>>,
reference_view: &ViewInfo,
source_views: &[ViewInfo],
width: u32,
height: u32,
) -> Result<()> {
let directions = [(0, -1), (-1, 0), (1, 0), (0, 1)];
for y in 0..height as usize {
for x in 0..width as usize {
let mut best_hypothesis = hypotheses[y][x].clone();
if best_hypothesis.cost == f32::MAX {
best_hypothesis.cost = self.compute_hypothesis_cost(
&best_hypothesis,
x, y,
reference_view,
source_views,
)?;
}
for (dx, dy) in directions.iter() {
let nx = x as i32 + dx;
let ny = y as i32 + dy;
if nx >= 0 && nx < width as i32 && ny >= 0 && ny < height as i32 {
let neighbor_hypothesis = &hypotheses[ny as usize][nx as usize];
let cost = self.compute_hypothesis_cost(
neighbor_hypothesis,
x, y,
reference_view,
source_views,
)?;
if cost < best_hypothesis.cost {
best_hypothesis = neighbor_hypothesis.clone();
best_hypothesis.cost = cost;
}
}
}
hypotheses[y][x] = best_hypothesis;
}
}
Ok(())
}
fn random_search_step(
&self,
hypotheses: &mut Vec<Vec<DepthHypothesis>>,
reference_view: &ViewInfo,
source_views: &[ViewInfo],
width: u32,
height: u32,
) -> Result<()> {
let mut rng = rand::thread_rng();
for y in 0..height as usize {
for x in 0..width as usize {
let current_hypothesis = &hypotheses[y][x];
let mut best_hypothesis = current_hypothesis.clone();
let mut search_range = self.config.random_search_range;
while search_range > 0.01 {
let depth_offset = rng.gen_range(-search_range..search_range);
let new_depth = (current_hypothesis.depth + depth_offset)
.clamp(self.config.min_depth, self.config.max_depth);
let normal_perturbation = self.random_normal(&mut rng) * search_range;
let new_normal = (current_hypothesis.normal + normal_perturbation).normalize();
let test_hypothesis = DepthHypothesis {
depth: new_depth,
normal: new_normal,
cost: f32::MAX,
};
let cost = self.compute_hypothesis_cost(
&test_hypothesis,
x, y,
reference_view,
source_views,
)?;
if cost < best_hypothesis.cost {
best_hypothesis = test_hypothesis;
best_hypothesis.cost = cost;
}
search_range *= 0.5;
}
hypotheses[y][x] = best_hypothesis;
}
}
Ok(())
}
fn compute_hypothesis_cost(
&self,
hypothesis: &DepthHypothesis,
x: usize,
y: usize,
reference_view: &ViewInfo,
source_views: &[ViewInfo],
) -> Result<f32> {
let point_3d = self.unproject_pixel(
x as f32, y as f32,
hypothesis.depth,
&reference_view.intrinsics,
&reference_view.pose,
)?;
let mut total_cost = 0.0;
let mut valid_views = 0;
for source_view in source_views {
if let Some(cost) = self.compute_view_cost(
&point_3d,
&hypothesis.normal,
x, y,
reference_view,
source_view,
)? {
total_cost += cost;
valid_views += 1;
}
}
if valid_views > 0 {
Ok(total_cost / valid_views as f32)
} else {
Ok(f32::MAX)
}
}
fn unproject_pixel(
&self,
x: f32,
y: f32,
depth: f32,
intrinsics: &CameraIntrinsics,
pose: &CameraPose,
) -> Result<Point3> {
let x_norm = (x - intrinsics.principal_point.0 as f32) / intrinsics.focal_length.0 as f32;
let y_norm = (y - intrinsics.principal_point.1 as f32) / intrinsics.focal_length.1 as f32;
let point_camera = NalgebraPoint3::new(
x_norm * depth,
y_norm * depth,
depth,
);
let point_camera_f64 = nalgebra::Point3::<f64>::new(
point_camera.x as f64,
point_camera.y as f64,
point_camera.z as f64,
);
let point_world = pose.rotation.inverse() *
(point_camera_f64.coords - pose.translation);
let point_world = nalgebra::Point3::from(point_world);
Ok(Point3::new(point_world.x, point_world.y, point_world.z))
}
fn compute_view_cost(
&self,
point_3d: &Point3,
normal: &Vector3<f32>,
ref_x: usize,
ref_y: usize,
reference_view: &ViewInfo,
source_view: &ViewInfo,
) -> Result<Option<f32>> {
let projected = self.project_point(
point_3d,
&source_view.intrinsics,
&source_view.pose,
)?;
let src_x = projected.x as usize;
let src_y = projected.y as usize;
if src_x >= source_view.image.size.0 as usize ||
src_y >= source_view.image.size.1 as usize {
return Ok(None);
}
let ncc = self.compute_ncc(
&reference_view.image, ref_x, ref_y,
&source_view.image, src_x, src_y,
)?;
Ok(Some(1.0 - ncc))
}
fn project_point(
&self,
point_3d: &Point3,
intrinsics: &CameraIntrinsics,
pose: &CameraPose,
) -> Result<Point2> {
let point_world = NalgebraPoint3::new(point_3d.x, point_3d.y, point_3d.z);
let point_camera = pose.rotation * point_world + pose.translation;
let x = intrinsics.focal_length.0 * (point_camera.x / point_camera.z) + intrinsics.principal_point.0;
let y = intrinsics.focal_length.1 * (point_camera.y / point_camera.z) + intrinsics.principal_point.1;
Ok(Point2::new(x, y))
}
fn compute_ncc(
&self,
ref_image: &Image,
ref_x: usize,
ref_y: usize,
src_image: &Image,
src_x: usize,
src_y: usize,
) -> Result<f32> {
let half_window = self.config.window_size / 2;
let mut ref_sum = 0.0;
let mut src_sum = 0.0;
let mut count = 0;
for dy in -(half_window as i32)..=(half_window as i32) {
for dx in -(half_window as i32)..=(half_window as i32) {
let ry = ref_y as i32 + dy;
let rx = ref_x as i32 + dx;
let sy = src_y as i32 + dy;
let sx = src_x as i32 + dx;
if ry >= 0 && ry < ref_image.size.1 as i32 &&
rx >= 0 && rx < ref_image.size.0 as i32 &&
sy >= 0 && sy < src_image.size.1 as i32 &&
sx >= 0 && sx < src_image.size.0 as i32 {
let ref_intensity = 128.0; let src_intensity = 128.0;
ref_sum += ref_intensity;
src_sum += src_intensity;
count += 1;
}
}
}
if count == 0 {
return Ok(0.0);
}
let ref_mean = ref_sum / count as f32;
let src_mean = src_sum / count as f32;
let mut numerator = 0.0;
let mut ref_variance = 0.0;
let mut src_variance = 0.0;
for dy in -(half_window as i32)..=(half_window as i32) {
for dx in -(half_window as i32)..=(half_window as i32) {
let ry = ref_y as i32 + dy;
let rx = ref_x as i32 + dx;
let sy = src_y as i32 + dy;
let sx = src_x as i32 + dx;
if ry >= 0 && ry < ref_image.size.1 as i32 &&
rx >= 0 && rx < ref_image.size.0 as i32 &&
sy >= 0 && sy < src_image.size.1 as i32 &&
sx >= 0 && sx < src_image.size.0 as i32 {
let ref_intensity = 128.0; let src_intensity = 128.0;
let ref_diff = ref_intensity - ref_mean;
let src_diff = src_intensity - src_mean;
numerator += ref_diff * src_diff;
ref_variance += ref_diff * ref_diff;
src_variance += src_diff * src_diff;
}
}
}
let denominator = (ref_variance * src_variance).sqrt();
if denominator > 1e-6 {
Ok((numerator / denominator).clamp(-1.0, 1.0))
} else {
Ok(0.0)
}
}
fn post_process_depth(
&self,
hypotheses: &mut Vec<Vec<DepthHypothesis>>,
width: u32,
height: u32,
) -> Result<()> {
self.median_filter(hypotheses, width, height)?;
self.bilateral_filter(hypotheses, width, height)?;
Ok(())
}
fn median_filter(
&self,
hypotheses: &mut Vec<Vec<DepthHypothesis>>,
width: u32,
height: u32,
) -> Result<()> {
let mut filtered = hypotheses.clone();
for y in 1..(height as usize - 1) {
for x in 1..(width as usize - 1) {
let mut depths = Vec::new();
for dy in -1..=1 {
for dx in -1..=1 {
let ny = (y as i32 + dy) as usize;
let nx = (x as i32 + dx) as usize;
depths.push(hypotheses[ny][nx].depth);
}
}
depths.sort_by(|a, b| a.partial_cmp(b).unwrap());
filtered[y][x].depth = depths[depths.len() / 2];
}
}
*hypotheses = filtered;
Ok(())
}
fn bilateral_filter(
&self,
_hypotheses: &mut Vec<Vec<DepthHypothesis>>,
_width: u32,
_height: u32,
) -> Result<()> {
Ok(())
}
fn build_depth_map(
&self,
hypotheses: &Vec<Vec<DepthHypothesis>>,
reference_view: &ViewInfo,
width: u32,
height: u32,
) -> Result<DepthMap> {
let mut depth_data = Vec::with_capacity((width * height) as usize);
for y in 0..height as usize {
for x in 0..width as usize {
depth_data.push(hypotheses[y][x].depth);
}
}
Ok(DepthMap {
width,
height,
data: depth_data,
intrinsics: reference_view.intrinsics.clone(),
pose: reference_view.pose.clone(),
})
}
fn build_normal_map(
&self,
hypotheses: &Vec<Vec<DepthHypothesis>>,
width: u32,
height: u32,
) -> NormalMap {
let mut normal_data = Vec::with_capacity((width * height) as usize);
for y in 0..height as usize {
for x in 0..width as usize {
normal_data.push(hypotheses[y][x].normal);
}
}
NormalMap {
width,
height,
data: normal_data,
}
}
fn build_cost_map(
&self,
hypotheses: &Vec<Vec<DepthHypothesis>>,
width: u32,
height: u32,
) -> CostMap {
let mut cost_data = Vec::with_capacity((width * height) as usize);
for y in 0..height as usize {
for x in 0..width as usize {
cost_data.push(hypotheses[y][x].cost);
}
}
CostMap {
width,
height,
data: cost_data,
}
}
}