use crate::core::{
Image, CameraPose, CameraIntrinsics, Point3, Result, ColmapError
};
use nalgebra::{Matrix3, Point3 as NalgebraPoint3};
#[derive(Debug)]
pub struct StereoMatcher {
config: StereoMatchConfig,
}
#[derive(Debug, Clone)]
pub struct StereoMatchConfig {
pub max_disparity: i32,
pub window_size: usize,
pub cost_threshold: f32,
pub consistency_check: bool,
pub subpixel_refinement: bool,
}
impl Default for StereoMatchConfig {
fn default() -> Self {
Self {
max_disparity: 128,
window_size: 5,
cost_threshold: 0.1,
consistency_check: true,
subpixel_refinement: true,
}
}
}
#[derive(Debug, Clone)]
pub struct DisparityMap {
pub width: u32,
pub height: u32,
pub data: Vec<f32>,
}
#[derive(Debug, Clone)]
pub struct DepthMap {
pub width: u32,
pub height: u32,
pub data: Vec<f32>,
pub intrinsics: CameraIntrinsics,
pub pose: CameraPose,
}
#[derive(Debug)]
pub struct StereoMatchResult {
pub disparity_map: DisparityMap,
pub depth_map: DepthMap,
pub confidence: Vec<f32>,
}
impl StereoMatcher {
pub fn new(config: StereoMatchConfig) -> Self {
Self { config }
}
pub fn match_stereo(
&self,
left_image: &Image,
right_image: &Image,
left_intrinsics: &CameraIntrinsics,
right_intrinsics: &CameraIntrinsics,
left_pose: &CameraPose,
right_pose: &CameraPose,
) -> Result<StereoMatchResult> {
if left_image.size.0 != right_image.size.0 || left_image.size.1 != right_image.size.1 {
return Err(ColmapError::MvsReconstruction(
"Images must have the same dimensions".to_string()
));
}
let width = left_image.size.0;
let height = left_image.size.1;
let fundamental_matrix = self.compute_fundamental_matrix(
left_intrinsics, right_intrinsics, left_pose, right_pose
)?;
let (rectified_left, rectified_right, rectification_params) =
self.rectify_stereo_pair(
left_image, right_image,
left_intrinsics, right_intrinsics,
&fundamental_matrix
)?;
let disparity_map = self.compute_disparity_map(
&rectified_left, &rectified_right
)?;
let depth_map = self.disparity_to_depth(
&disparity_map,
&rectification_params,
left_intrinsics,
left_pose,
)?;
let confidence = self.compute_confidence(&disparity_map, &rectified_left, &rectified_right)?;
Ok(StereoMatchResult {
disparity_map,
depth_map,
confidence,
})
}
fn compute_fundamental_matrix(
&self,
left_intrinsics: &CameraIntrinsics,
right_intrinsics: &CameraIntrinsics,
left_pose: &CameraPose,
right_pose: &CameraPose,
) -> Result<Matrix3<f64>> {
let relative_rotation = right_pose.rotation.inverse() * left_pose.rotation;
let relative_translation = right_pose.rotation.inverse() *
(left_pose.translation - right_pose.translation);
let tx = relative_translation.x;
let ty = relative_translation.y;
let tz = relative_translation.z;
let skew_symmetric = Matrix3::new(
0.0, -tz, ty,
tz, 0.0, -tx,
-ty, tx, 0.0,
);
let essential_matrix = skew_symmetric * relative_rotation.to_rotation_matrix().matrix();
let k1_inv = left_intrinsics.matrix().try_inverse()
.ok_or_else(|| ColmapError::MvsReconstruction(
"Cannot invert left camera intrinsics matrix".to_string()
))?;
let k2_inv = right_intrinsics.matrix().try_inverse()
.ok_or_else(|| ColmapError::MvsReconstruction(
"Cannot invert right camera intrinsics matrix".to_string()
))?;
let fundamental_matrix = k2_inv.transpose() * essential_matrix * k1_inv;
Ok(fundamental_matrix)
}
fn rectify_stereo_pair(
&self,
left_image: &Image,
right_image: &Image,
left_intrinsics: &CameraIntrinsics,
_right_intrinsics: &CameraIntrinsics,
_fundamental_matrix: &Matrix3<f64>,
) -> Result<(Image, Image, RectificationParams)> {
let rectified_left = left_image.clone();
let rectified_right = right_image.clone();
let params = RectificationParams {
baseline: 0.1, focal_length: left_intrinsics.focal_length.0,
};
Ok((rectified_left, rectified_right, params))
}
fn compute_disparity_map(
&self,
left_image: &Image,
right_image: &Image,
) -> Result<DisparityMap> {
let width = left_image.size.0;
let height = left_image.size.1;
let mut disparity_data = vec![0.0; (width * height) as usize];
let half_window = self.config.window_size / 2;
for y in half_window..(height as usize - half_window) {
for x in half_window..(width as usize - half_window) {
let mut best_disparity = 0.0;
let mut min_cost = f32::MAX;
for d in 0..self.config.max_disparity {
let right_x = x as i32 - d;
if right_x < half_window as i32 ||
right_x >= (width as i32 - half_window as i32) {
continue;
}
let cost = self.compute_matching_cost(
left_image, right_image,
x, y, right_x as usize, y
);
if cost < min_cost {
min_cost = cost;
best_disparity = d as f32;
}
}
if self.config.subpixel_refinement {
best_disparity = self.refine_subpixel_disparity(
left_image, right_image, x, y, best_disparity
);
}
disparity_data[y * width as usize + x] = best_disparity;
}
}
Ok(DisparityMap {
width,
height,
data: disparity_data,
})
}
fn compute_matching_cost(
&self,
left_image: &Image,
right_image: &Image,
left_x: usize,
left_y: usize,
right_x: usize,
right_y: usize,
) -> f32 {
let half_window = self.config.window_size / 2;
let mut cost = 0.0f32;
let mut count = 0;
for dy in -(half_window as i32)..=(half_window as i32) {
for dx in -(half_window as i32)..=(half_window as i32) {
let ly = (left_y as i32 + dy) as usize;
let lx = (left_x as i32 + dx) as usize;
let ry = (right_y as i32 + dy) as usize;
let rx = (right_x as i32 + dx) as usize;
if ly < left_image.size.1 as usize && lx < left_image.size.0 as usize &&
ry < right_image.size.1 as usize && rx < right_image.size.0 as usize {
let left_intensity = 128.0f32; let right_intensity = 128.0f32;
cost += (left_intensity - right_intensity).abs();
count += 1;
}
}
}
if count > 0 {
cost / count as f32
} else {
f32::MAX
}
}
fn refine_subpixel_disparity(
&self,
left_image: &Image,
right_image: &Image,
x: usize,
y: usize,
disparity: f32,
) -> f32 {
let d = disparity as i32;
if d <= 0 || d >= self.config.max_disparity - 1 {
return disparity;
}
let cost_prev = self.compute_matching_cost(
left_image, right_image, x, y, (x as i32 - d + 1) as usize, y
);
let cost_curr = self.compute_matching_cost(
left_image, right_image, x, y, (x as i32 - d) as usize, y
);
let cost_next = self.compute_matching_cost(
left_image, right_image, x, y, (x as i32 - d - 1) as usize, y
);
let denom = 2.0 * (cost_prev + cost_next - 2.0 * cost_curr);
if denom.abs() > 1e-6 {
let offset = (cost_prev - cost_next) / denom;
disparity + offset.clamp(-1.0, 1.0)
} else {
disparity
}
}
fn disparity_to_depth(
&self,
disparity_map: &DisparityMap,
rectification_params: &RectificationParams,
intrinsics: &CameraIntrinsics,
pose: &CameraPose,
) -> Result<DepthMap> {
let mut depth_data = vec![0.0; disparity_map.data.len()];
for (i, &disparity) in disparity_map.data.iter().enumerate() {
if disparity > 0.0 {
let depth = rectification_params.baseline * rectification_params.focal_length / disparity as f64;
depth_data[i] = depth as f32;
}
}
Ok(DepthMap {
width: disparity_map.width,
height: disparity_map.height,
data: depth_data,
intrinsics: intrinsics.clone(),
pose: pose.clone(),
})
}
fn compute_confidence(
&self,
disparity_map: &DisparityMap,
left_image: &Image,
right_image: &Image,
) -> Result<Vec<f32>> {
let mut confidence = vec![0.0; disparity_map.data.len()];
for (i, &disparity) in disparity_map.data.iter().enumerate() {
if disparity > 0.0 {
confidence[i] = if disparity > self.config.cost_threshold {
1.0 - (disparity / self.config.max_disparity as f32)
} else {
0.0
};
}
}
Ok(confidence)
}
}
#[derive(Debug, Clone)]
pub struct RectificationParams {
pub baseline: f64,
pub focal_length: f64,
}
impl DepthMap {
pub fn get_depth(&self, x: u32, y: u32) -> Option<f32> {
if x < self.width && y < self.height {
let index = (y * self.width + x) as usize;
let depth = self.data[index];
if depth > 0.0 {
Some(depth)
} else {
None
}
} else {
None
}
}
pub fn to_point_cloud(&self) -> Vec<Point3> {
let mut points = Vec::new();
for y in 0..self.height {
for x in 0..self.width {
if let Some(depth) = self.get_depth(x, y) {
let x_norm = (x as f64 - self.intrinsics.principal_point.0) / self.intrinsics.focal_length.0;
let y_norm = (y as f64 - self.intrinsics.principal_point.1) / self.intrinsics.focal_length.1;
let point_camera = NalgebraPoint3::new(
x_norm * depth as f64,
y_norm * depth as f64,
depth as f64,
);
let point_world: nalgebra::Point3<f64> = (self.pose.rotation.inverse() *
(point_camera - nalgebra::Point3::<f64>::from(self.pose.translation))).into();
points.push(Point3::new(point_world.x, point_world.y, point_world.z));
}
}
}
points
}
}