Skip to main content

oximedia_align/
stereo_depth.rs

1//! Depth estimation from a rectified stereo image pair.
2//!
3//! Given two horizontally-aligned images (i.e. the output of `stereo_rectify`),
4//! this module computes a dense disparity map using **block matching** (Sum of
5//! Absolute Differences) and then converts each disparity value to a physical
6//! depth via the standard stereo camera formula:
7//!
8//! ```text
9//! depth = (focal_length_px * baseline_m) / disparity
10//! ```
11//!
12//! # Design
13//!
14//! | Symbol | Meaning |
15//! |--------|---------|
16//! | `f`    | Focal length in pixels (same for both rectified cameras) |
17//! | `B`    | Baseline — distance between camera optical centres in metres |
18//! | `d`    | Disparity in pixels (left column minus best-matching right column) |
19//!
20//! The search is restricted to `[min_disparity, max_disparity)` pixels to the
21//! **left** of the corresponding point in the right image (standard convention
22//! for left-camera disparity).  Pixels for which no valid disparity is found
23//! (near the image border or where min_disparity == 0) receive `f32::INFINITY`.
24
25use crate::{AlignError, AlignResult};
26
27// ─── Configuration ────────────────────────────────────────────────────────────
28
29/// Configuration for the block-matching stereo depth estimator.
30#[derive(Debug, Clone)]
31pub struct StereoDepthConfig {
32    /// Side length (in pixels) of the square matching window.
33    ///
34    /// Must be odd and ≥ 1.  Typical values: 5, 7, 11.
35    pub block_size: usize,
36
37    /// Minimum disparity to search (inclusive).  Must be ≥ 0.
38    pub min_disparity: i32,
39
40    /// Maximum disparity to search (exclusive).  Must be > `min_disparity`.
41    pub max_disparity: i32,
42
43    /// Focal length of the rectified cameras in **pixels**.
44    pub focal_length_px: f64,
45
46    /// Distance between camera optical centres in **metres** (baseline).
47    pub baseline_m: f64,
48}
49
50impl StereoDepthConfig {
51    /// Returns `Err` if the configuration is self-contradictory.
52    ///
53    /// # Errors
54    ///
55    /// - `block_size` is zero or even.
56    /// - `min_disparity >= max_disparity`.
57    /// - `focal_length_px` or `baseline_m` are non-positive.
58    pub fn validate(&self) -> AlignResult<()> {
59        if self.block_size == 0 || self.block_size % 2 == 0 {
60            return Err(AlignError::InvalidConfig(
61                "block_size must be a positive odd number".to_string(),
62            ));
63        }
64        if self.min_disparity >= self.max_disparity {
65            return Err(AlignError::InvalidConfig(
66                "min_disparity must be less than max_disparity".to_string(),
67            ));
68        }
69        if self.focal_length_px <= 0.0 {
70            return Err(AlignError::InvalidConfig(
71                "focal_length_px must be positive".to_string(),
72            ));
73        }
74        if self.baseline_m <= 0.0 {
75            return Err(AlignError::InvalidConfig(
76                "baseline_m must be positive".to_string(),
77            ));
78        }
79        Ok(())
80    }
81}
82
83impl Default for StereoDepthConfig {
84    fn default() -> Self {
85        Self {
86            block_size: 7,
87            min_disparity: 0,
88            max_disparity: 64,
89            focal_length_px: 700.0,
90            baseline_m: 0.12,
91        }
92    }
93}
94
95// ─── Estimator ────────────────────────────────────────────────────────────────
96
97/// Stereo depth estimator using SAD (Sum of Absolute Differences) block matching.
98///
99/// Expects **rectified** grayscale image pairs — i.e. the horizontal epipolar
100/// lines of left and right images coincide, so the search reduces to a 1-D
101/// scan along each row.
102///
103/// # Example
104///
105/// ```rust
106/// use oximedia_align::stereo_depth::{StereoDepthConfig, StereoDepthEstimator};
107///
108/// let config = StereoDepthConfig {
109///     block_size: 5,
110///     min_disparity: 1,
111///     max_disparity: 16,
112///     focal_length_px: 500.0,
113///     baseline_m: 0.1,
114/// };
115/// let estimator = StereoDepthEstimator::new();
116///
117/// let width = 16usize;
118/// let height = 8usize;
119/// let left = vec![128u8; width * height];
120/// let right = vec![128u8; width * height];
121///
122/// // Uniform images → disparity 1 (min_disparity) for every pixel
123/// let depth = estimator
124///     .compute_depth_map(&left, &right, width, height, &config)
125///     .unwrap();
126/// assert_eq!(depth.len(), width * height);
127/// ```
128#[derive(Debug, Default)]
129pub struct StereoDepthEstimator;
130
131impl StereoDepthEstimator {
132    /// Creates a new `StereoDepthEstimator`.
133    #[must_use]
134    pub fn new() -> Self {
135        Self
136    }
137
138    /// Computes a dense depth map from a rectified stereo pair.
139    ///
140    /// # Parameters
141    ///
142    /// - `left`   – Row-major grayscale pixels (u8) of the **left** camera image.
143    /// - `right`  – Row-major grayscale pixels (u8) of the **right** camera image.
144    /// - `width`  – Image width in pixels.  Both images must have the same dimensions.
145    /// - `height` – Image height in pixels.
146    /// - `config` – Block matching and camera parameters.
147    ///
148    /// # Returns
149    ///
150    /// A `Vec<f32>` with `width * height` depth values in metres, stored in
151    /// row-major order.  Pixels for which depth cannot be determined are set to
152    /// `f32::INFINITY`.
153    ///
154    /// # Errors
155    ///
156    /// Returns [`AlignError::InvalidConfig`] if `config.validate()` fails, or
157    /// [`AlignError::InsufficientData`] if the images are too small to hold
158    /// even a single matching block.
159    pub fn compute_depth_map(
160        &self,
161        left: &[u8],
162        right: &[u8],
163        width: usize,
164        height: usize,
165        config: &StereoDepthConfig,
166    ) -> AlignResult<Vec<f32>> {
167        config.validate()?;
168
169        if left.len() != width * height || right.len() != width * height {
170            return Err(AlignError::InsufficientData(
171                "Image buffer length does not match width × height".to_string(),
172            ));
173        }
174
175        let half = config.block_size / 2;
176        let max_disp = config.max_disparity as usize;
177        let min_disp = config.min_disparity.max(0) as usize;
178
179        let mut depth_map = vec![f32::INFINITY; width * height];
180
181        for y in 0..height {
182            // Row range for the matching block (clipped to image bounds)
183            let row_start = y.saturating_sub(half);
184            let row_end = (y + half + 1).min(height);
185
186            for x in 0..width {
187                // Column range for the left patch
188                let col_start_l = x.saturating_sub(half);
189                let col_end_l = (x + half + 1).min(width);
190
191                let mut best_sad = u64::MAX;
192                let mut best_disp: Option<usize> = None;
193
194                // Search over candidate disparities
195                for d in min_disp..max_disp {
196                    // Shift right image patch to the left by d pixels
197                    // Right patch starts at col_start_l - d (saturated)
198                    let right_col_start = if col_start_l >= d {
199                        col_start_l - d
200                    } else {
201                        continue; // patch would go out of bounds
202                    };
203                    let right_col_end = if col_end_l > d {
204                        col_end_l - d
205                    } else {
206                        continue;
207                    };
208
209                    let mut sad: u64 = 0;
210                    let mut count: usize = 0;
211
212                    for row in row_start..row_end {
213                        let left_row = &left[row * width..];
214                        let right_row = &right[row * width..];
215
216                        let l_slice = &left_row[col_start_l..col_end_l];
217                        let r_slice = &right_row[right_col_start..right_col_end];
218
219                        // Number of overlapping columns
220                        let cols = l_slice.len().min(r_slice.len());
221                        for c in 0..cols {
222                            let diff =
223                                (i32::from(l_slice[c]) - i32::from(r_slice[c])).unsigned_abs();
224                            sad += u64::from(diff);
225                            count += 1;
226                        }
227                    }
228
229                    if count == 0 {
230                        continue;
231                    }
232
233                    // Normalise by area to make SAD comparable across border pixels
234                    let norm_sad = sad / count as u64;
235
236                    if norm_sad < best_sad {
237                        best_sad = norm_sad;
238                        best_disp = Some(d);
239                    }
240                }
241
242                if let Some(d) = best_disp {
243                    if d == 0 {
244                        // Zero disparity → infinite depth (point at infinity)
245                        depth_map[y * width + x] = f32::INFINITY;
246                    } else {
247                        let depth = (config.focal_length_px * config.baseline_m) / d as f64;
248                        depth_map[y * width + x] = depth as f32;
249                    }
250                }
251                // Else: remains INFINITY (no valid match found)
252            }
253        }
254
255        Ok(depth_map)
256    }
257}
258
259// ─── Tests ────────────────────────────────────────────────────────────────────
260
261#[cfg(test)]
262mod tests {
263    use super::*;
264
265    /// Helper: create a uniform gray image.
266    fn uniform_image(width: usize, height: usize, value: u8) -> Vec<u8> {
267        vec![value; width * height]
268    }
269
270    /// A disparity of `d` pixels means every left-image column is best matched
271    /// by the right-image column shifted by `d` to the left.  For a uniform
272    /// image the SAD is 0 for every candidate, so the first (minimum) disparity
273    /// `min_disparity` wins.  The expected depth is then:
274    ///
275    /// `depth = f * B / min_disparity`
276    #[test]
277    fn test_depth_map_uniform_disparity() {
278        let config = StereoDepthConfig {
279            block_size: 3,
280            min_disparity: 2,
281            max_disparity: 16,
282            focal_length_px: 400.0,
283            baseline_m: 0.1,
284        };
285        let estimator = StereoDepthEstimator::new();
286        let w = 24usize;
287        let h = 12usize;
288
289        let left = uniform_image(w, h, 100);
290        let right = uniform_image(w, h, 100);
291
292        let depth = estimator
293            .compute_depth_map(&left, &right, w, h, &config)
294            .expect("depth map should succeed");
295
296        assert_eq!(depth.len(), w * h);
297
298        // Expected: f * B / min_disparity = 400.0 * 0.1 / 2 = 20.0
299        let expected = (config.focal_length_px * config.baseline_m) / config.min_disparity as f64;
300
301        for (idx, &d) in depth.iter().enumerate() {
302            // Border pixels may be INFINITY (not enough block overlap)
303            if d.is_finite() {
304                assert!(
305                    (f64::from(d) - expected).abs() < 1.0,
306                    "pixel {idx}: depth {d} expected ~{expected}"
307                );
308            }
309        }
310    }
311
312    /// When `min_disparity == 0`, every pixel whose best match is at disparity 0
313    /// should receive `f32::INFINITY` (infinite depth / point at infinity).
314    #[test]
315    fn test_depth_map_zero_disparity_infinite_depth() {
316        let config = StereoDepthConfig {
317            block_size: 3,
318            min_disparity: 0, // allows zero disparity
319            max_disparity: 8,
320            focal_length_px: 500.0,
321            baseline_m: 0.2,
322        };
323        let estimator = StereoDepthEstimator::new();
324        let w = 20usize;
325        let h = 10usize;
326
327        // Identical images → best disparity is 0 → depth = INFINITY
328        let img = uniform_image(w, h, 128);
329        let depth = estimator
330            .compute_depth_map(&img, &img, w, h, &config)
331            .expect("depth map should succeed");
332
333        assert_eq!(depth.len(), w * h);
334
335        // Every valid pixel should be INFINITY when disparity is 0
336        for &d in &depth {
337            assert!(d.is_infinite(), "expected infinite depth, got {d}");
338        }
339    }
340
341    /// Validate that non-trivial disparity (shifted right image) is recovered.
342    ///
343    /// Construct a left image with a vertical stripe of bright pixels and a
344    /// right image with the same stripe shifted by exactly `shift` pixels.
345    #[test]
346    fn test_depth_map_known_shift() {
347        let config = StereoDepthConfig {
348            block_size: 3,
349            min_disparity: 1,
350            max_disparity: 10,
351            focal_length_px: 300.0,
352            baseline_m: 0.06,
353        };
354        let estimator = StereoDepthEstimator::new();
355        let w = 32usize;
356        let h = 16usize;
357        let shift: usize = 4;
358
359        let mut left = vec![50u8; w * h];
360        let mut right = vec![50u8; w * h];
361
362        // Bright stripe at column 12 in left; column 12 - shift in right
363        let stripe_col_l: usize = 12;
364        let stripe_col_r = stripe_col_l.saturating_sub(shift);
365        for row in 0..h {
366            left[row * w + stripe_col_l] = 200;
367            if stripe_col_r < w {
368                right[row * w + stripe_col_r] = 200;
369            }
370        }
371
372        let depth = estimator
373            .compute_depth_map(&left, &right, w, h, &config)
374            .expect("depth map should succeed");
375
376        // Near the stripe centre pixels should have depth ≈ f*B/shift
377        let expected = (config.focal_length_px * config.baseline_m) / shift as f64;
378        let half = config.block_size / 2;
379
380        for row in half..(h - half) {
381            let idx = row * w + stripe_col_l;
382            let d = depth[idx];
383            if d.is_finite() {
384                assert!(
385                    (f64::from(d) - expected).abs() < expected * 0.5,
386                    "row {row}: depth {d} expected ~{expected}"
387                );
388            }
389        }
390    }
391
392    #[test]
393    fn test_config_validation_rejects_even_block_size() {
394        let config = StereoDepthConfig {
395            block_size: 4,
396            ..StereoDepthConfig::default()
397        };
398        assert!(config.validate().is_err());
399    }
400
401    #[test]
402    fn test_config_validation_rejects_inverted_disparity_range() {
403        let config = StereoDepthConfig {
404            min_disparity: 10,
405            max_disparity: 5,
406            ..StereoDepthConfig::default()
407        };
408        assert!(config.validate().is_err());
409    }
410}