Skip to main content

scirs2_vision/
depth_processing.rs

1//! Depth Image Processing
2//!
3//! Utilities for converting depth images to point clouds, filling depth
4//! holes via a simple inward-propagation (fast-marching-style) scheme,
5//! bilateral filtering for edge-preserving depth smoothing, surface normal
6//! computation, and disparity-to-depth conversion.
7
8use crate::point_cloud::PointCloud;
9use scirs2_core::ndarray::{Array2, Array3};
10use std::collections::VecDeque;
11
12// ─────────────────────────────────────────────────────────────────────────────
13// depth_to_pointcloud
14// ─────────────────────────────────────────────────────────────────────────────
15
16/// Unproject a depth image into a 3D point cloud using pinhole camera
17/// intrinsics.
18///
19/// # Arguments
20/// * `depth`       – H×W depth image in **metres** (0.0 means invalid).
21/// * `fx`, `fy`    – Focal lengths in pixels.
22/// * `cx`, `cy`    – Principal point in pixels.
23/// * `depth_scale` – Multiply raw pixel values by this factor to get metres
24///   (use 1.0 when values are already in metres).
25///
26/// # Returns
27/// A [`PointCloud`] with one point per valid (non-zero) depth pixel.
28pub fn depth_to_pointcloud(
29    depth: &Array2<f32>,
30    fx: f64,
31    fy: f64,
32    cx: f64,
33    cy: f64,
34    depth_scale: f64,
35) -> PointCloud {
36    let (height, width) = depth.dim();
37    let mut pts: Vec<[f64; 3]> = Vec::with_capacity(height * width);
38
39    for row in 0..height {
40        for col in 0..width {
41            let raw = depth[[row, col]] as f64 * depth_scale;
42            if raw <= 0.0 || !raw.is_finite() {
43                continue;
44            }
45            let z = raw;
46            let x = (col as f64 - cx) * z / fx;
47            let y = (row as f64 - cy) * z / fy;
48            pts.push([x, y, z]);
49        }
50    }
51
52    PointCloud::from_vec(pts)
53}
54
55// ─────────────────────────────────────────────────────────────────────────────
56// fill_depth_holes
57// ─────────────────────────────────────────────────────────────────────────────
58
59/// Fill holes (zero-valued pixels) in a depth image using a
60/// fast-marching-inspired inward propagation.
61///
62/// Starting from all valid pixels adjacent to holes, values propagate
63/// inward using a distance-weighted average.  Only connected regions of
64/// zeros with an area ≤ `max_hole_size` pixels are filled.
65pub fn fill_depth_holes(depth: &Array2<f32>, max_hole_size: usize) -> Array2<f32> {
66    let (h, w) = depth.dim();
67    let mut output = depth.clone();
68
69    // Identify hole pixels (depth == 0) and label connected regions.
70    let mut visited = Array2::from_elem((h, w), false);
71    let neighbors4: [(i32, i32); 4] = [(-1, 0), (1, 0), (0, -1), (0, 1)];
72
73    for start_r in 0..h {
74        for start_c in 0..w {
75            if depth[[start_r, start_c]] > 0.0 || visited[[start_r, start_c]] {
76                continue;
77            }
78            // BFS to find the extent of this hole.
79            let mut region: Vec<(usize, usize)> = Vec::new();
80            let mut queue: VecDeque<(usize, usize)> = VecDeque::new();
81            queue.push_back((start_r, start_c));
82            visited[[start_r, start_c]] = true;
83
84            while let Some((r, c)) = queue.pop_front() {
85                region.push((r, c));
86                for &(dr, dc) in &neighbors4 {
87                    let nr = r as i32 + dr;
88                    let nc = c as i32 + dc;
89                    if nr < 0 || nr >= h as i32 || nc < 0 || nc >= w as i32 {
90                        continue;
91                    }
92                    let nr = nr as usize;
93                    let nc = nc as usize;
94                    if !visited[[nr, nc]] && depth[[nr, nc]] == 0.0 {
95                        visited[[nr, nc]] = true;
96                        queue.push_back((nr, nc));
97                    }
98                }
99            }
100
101            // Skip large holes.
102            if region.len() > max_hole_size {
103                continue;
104            }
105
106            // Fill each pixel with the weighted average of its valid neighbours
107            // within the original depth image.
108            for (r, c) in region {
109                let mut sum = 0.0f64;
110                let mut weight = 0.0f64;
111                for &(dr, dc) in &neighbors4 {
112                    let nr = r as i32 + dr;
113                    let nc = c as i32 + dc;
114                    if nr < 0 || nr >= h as i32 || nc < 0 || nc >= w as i32 {
115                        continue;
116                    }
117                    let v = depth[[nr as usize, nc as usize]];
118                    if v > 0.0 {
119                        sum += v as f64;
120                        weight += 1.0;
121                    }
122                }
123                if weight > 0.0 {
124                    output[[r, c]] = (sum / weight) as f32;
125                }
126            }
127        }
128    }
129
130    output
131}
132
133// ─────────────────────────────────────────────────────────────────────────────
134// bilateral_filter_depth
135// ─────────────────────────────────────────────────────────────────────────────
136
137/// Bilateral filter for edge-preserving depth-image smoothing.
138///
139/// Each output pixel is a Gaussian-weighted average of its spatial
140/// neighbourhood where the weight is further attenuated by the depth
141/// difference (colour term).  Zero-valued (invalid) pixels are excluded from
142/// the computation.
143///
144/// # Arguments
145/// * `depth`        – H×W depth image (0 means invalid).
146/// * `d`            – Filter diameter (the kernel extends ±(d/2) pixels).
147/// * `sigma_color`  – Standard deviation of the intensity (depth) Gaussian.
148/// * `sigma_space`  – Standard deviation of the spatial Gaussian.
149pub fn bilateral_filter_depth(
150    depth: &Array2<f32>,
151    d: usize,
152    sigma_color: f64,
153    sigma_space: f64,
154) -> Array2<f32> {
155    let (h, w) = depth.dim();
156    let mut output = depth.clone();
157
158    if d == 0 || sigma_color <= 0.0 || sigma_space <= 0.0 {
159        return output;
160    }
161
162    let radius = (d / 2) as i32;
163    let inv_2_sc2 = 1.0 / (2.0 * sigma_color * sigma_color);
164    let inv_2_ss2 = 1.0 / (2.0 * sigma_space * sigma_space);
165
166    for row in 0..h {
167        for col in 0..w {
168            let center = depth[[row, col]];
169            if center == 0.0 {
170                continue;
171            }
172            let mut sum = 0.0f64;
173            let mut weight_total = 0.0f64;
174
175            for dr in -radius..=radius {
176                for dc in -radius..=radius {
177                    let nr = row as i32 + dr;
178                    let nc = col as i32 + dc;
179                    if nr < 0 || nr >= h as i32 || nc < 0 || nc >= w as i32 {
180                        continue;
181                    }
182                    let v = depth[[nr as usize, nc as usize]];
183                    if v == 0.0 {
184                        continue;
185                    }
186                    let color_diff = (v as f64 - center as f64).powi(2);
187                    let space_dist = (dr * dr + dc * dc) as f64;
188                    let w_c = (-color_diff * inv_2_sc2).exp();
189                    let w_s = (-space_dist * inv_2_ss2).exp();
190                    let w = w_c * w_s;
191                    sum += v as f64 * w;
192                    weight_total += w;
193                }
194            }
195
196            if weight_total > 1e-12 {
197                output[[row, col]] = (sum / weight_total) as f32;
198            }
199        }
200    }
201
202    output
203}
204
205// ─────────────────────────────────────────────────────────────────────────────
206// depth_normals
207// ─────────────────────────────────────────────────────────────────────────────
208
209/// Compute surface normals from a depth image using finite differences.
210///
211/// For each pixel the normal is computed as the cross product of the
212/// forward horizontal and vertical depth gradients (in camera space),
213/// then normalised.  Invalid pixels (depth == 0) yield a zero normal.
214///
215/// # Returns
216/// An `(H, W, 3)` array where the last axis contains the (nx, ny, nz)
217/// components in camera space.
218pub fn depth_normals(depth: &Array2<f32>, fx: f64, fy: f64) -> Array3<f64> {
219    let (h, w) = depth.dim();
220    let mut normals = Array3::zeros((h, w, 3));
221
222    for row in 0..h {
223        for col in 0..w {
224            let z = depth[[row, col]];
225            if z == 0.0 {
226                continue;
227            }
228            // Horizontal gradient (finite difference along x-axis).
229            let (z_r, valid_r) = if col + 1 < w && depth[[row, col + 1]] > 0.0 {
230                (depth[[row, col + 1]], true)
231            } else if col > 0 && depth[[row, col - 1]] > 0.0 {
232                (depth[[row, col - 1]], true)
233            } else {
234                (z, false)
235            };
236            // Vertical gradient (finite difference along y-axis).
237            let (z_d, valid_d) = if row + 1 < h && depth[[row + 1, col]] > 0.0 {
238                (depth[[row + 1, col]], true)
239            } else if row > 0 && depth[[row - 1, col]] > 0.0 {
240                (depth[[row - 1, col]], true)
241            } else {
242                (z, false)
243            };
244
245            if !valid_r || !valid_d {
246                continue;
247            }
248
249            // 3-D positions of the three vertices.
250            let to_xyz = |r: usize, c: usize, dv: f32| -> [f64; 3] {
251                let dv64 = dv as f64;
252                [
253                    (c as f64 - (w as f64 / 2.0)) * dv64 / fx,
254                    (r as f64 - (h as f64 / 2.0)) * dv64 / fy,
255                    dv64,
256                ]
257            };
258
259            let p0 = to_xyz(row, col, z);
260            let p_r = to_xyz(row, col + 1, z_r);
261            let p_d = to_xyz(row + 1, col, z_d);
262
263            // Tangent vectors.
264            let tx = [p_r[0] - p0[0], p_r[1] - p0[1], p_r[2] - p0[2]];
265            let ty = [p_d[0] - p0[0], p_d[1] - p0[1], p_d[2] - p0[2]];
266
267            // Cross product.
268            let nx = ty[1] * tx[2] - ty[2] * tx[1];
269            let ny = ty[2] * tx[0] - ty[0] * tx[2];
270            let nz = ty[0] * tx[1] - ty[1] * tx[0];
271            let len = (nx * nx + ny * ny + nz * nz).sqrt();
272
273            if len > 1e-12 {
274                normals[[row, col, 0]] = nx / len;
275                normals[[row, col, 1]] = ny / len;
276                normals[[row, col, 2]] = nz / len;
277            }
278        }
279    }
280
281    normals
282}
283
284// ─────────────────────────────────────────────────────────────────────────────
285// disparity_to_depth
286// ─────────────────────────────────────────────────────────────────────────────
287
288/// Convert a stereo disparity map to a depth map.
289///
290/// Uses the stereo camera equation:
291/// `depth = (focal_length × baseline) / disparity`
292///
293/// Zero-disparity pixels (invalid) produce zero depth.
294///
295/// # Arguments
296/// * `disparity`    – H×W disparity image in pixels.
297/// * `baseline`     – Stereo baseline in metres.
298/// * `focal_length` – Camera focal length in pixels.
299pub fn disparity_to_depth(
300    disparity: &Array2<f32>,
301    baseline: f64,
302    focal_length: f64,
303) -> Array2<f32> {
304    let (h, w) = disparity.dim();
305    let mut depth = Array2::zeros((h, w));
306
307    let fb = (focal_length * baseline) as f32;
308
309    for row in 0..h {
310        for col in 0..w {
311            let d = disparity[[row, col]];
312            if d > 0.0 {
313                depth[[row, col]] = fb / d;
314            }
315        }
316    }
317
318    depth
319}
320
321// ─────────────────────────────────────────────────────────────────────────────
322// Tests
323// ─────────────────────────────────────────────────────────────────────────────
324
325#[cfg(test)]
326mod tests {
327    use super::*;
328    use approx::assert_abs_diff_eq;
329    use scirs2_core::ndarray::Array2;
330
331    // ─── depth_to_pointcloud ───────────────────────────────────────────────
332
333    #[test]
334    fn test_depth_to_pointcloud_shape() {
335        // 4×4 depth image, all pixels at 1.0 m.
336        let depth = Array2::from_elem((4, 4), 1.0f32);
337        let pc = depth_to_pointcloud(&depth, 100.0, 100.0, 2.0, 2.0, 1.0);
338        // All 16 pixels should produce a point.
339        assert_eq!(pc.n_points(), 16);
340    }
341
342    #[test]
343    fn test_depth_to_pointcloud_invalid_zeros_excluded() {
344        let mut depth = Array2::zeros((4, 4));
345        depth[[1, 1]] = 2.0f32;
346        depth[[2, 2]] = 3.0f32;
347        let pc = depth_to_pointcloud(&depth, 100.0, 100.0, 2.0, 2.0, 1.0);
348        assert_eq!(pc.n_points(), 2);
349    }
350
351    #[test]
352    fn test_depth_to_pointcloud_center_pixel() {
353        // A single pixel at the principal point should project to (0, 0, z).
354        let mut depth = Array2::zeros((5, 5));
355        depth[[2, 2]] = 1.0f32;
356        let pc = depth_to_pointcloud(&depth, 100.0, 100.0, 2.0, 2.0, 1.0);
357        assert_eq!(pc.n_points(), 1);
358        // x = (2 - 2) * 1.0 / 100 = 0, y = (2 - 2) * 1.0 / 100 = 0, z = 1.0
359        assert_abs_diff_eq!(pc.points[[0, 0]], 0.0, epsilon = 1e-9);
360        assert_abs_diff_eq!(pc.points[[0, 1]], 0.0, epsilon = 1e-9);
361        assert_abs_diff_eq!(pc.points[[0, 2]], 1.0, epsilon = 1e-9);
362    }
363
364    // ─── fill_depth_holes ────────────────────────────────────────────────
365
366    #[test]
367    fn test_fill_depth_holes_small_hole() {
368        // 5×5 grid with one zero pixel surrounded by 1.0.
369        let mut depth = Array2::from_elem((5, 5), 1.0f32);
370        depth[[2, 2]] = 0.0;
371        let filled = fill_depth_holes(&depth, 10);
372        // The hole should now be filled.
373        assert!(filled[[2, 2]] > 0.0, "hole should be filled");
374        assert_abs_diff_eq!(filled[[2, 2]], 1.0, epsilon = 0.1);
375    }
376
377    #[test]
378    fn test_fill_depth_holes_large_hole_not_filled() {
379        // 10×10 grid of zeros – too large to fill.
380        let depth = Array2::zeros((10, 10));
381        let filled = fill_depth_holes(&depth, 5);
382        // No valid neighbours exist, so nothing can be filled.
383        assert!(filled.iter().all(|&v| v == 0.0));
384    }
385
386    // ─── bilateral_filter_depth ──────────────────────────────────────────
387
388    #[test]
389    fn test_bilateral_filter_preserves_valid_shape() {
390        let depth = Array2::from_elem((8, 8), 2.0f32);
391        let filtered = bilateral_filter_depth(&depth, 5, 0.5, 2.0);
392        assert_eq!(filtered.dim(), (8, 8));
393    }
394
395    #[test]
396    fn test_bilateral_filter_edge_preservation() {
397        // Image with sharp step: left half at 1.0, right half at 5.0.
398        let (h, w) = (10, 10);
399        let mut depth = Array2::zeros((h, w));
400        for r in 0..h {
401            for c in 0..w {
402                depth[[r, c]] = if c < w / 2 { 1.0 } else { 5.0 };
403            }
404        }
405        let filtered = bilateral_filter_depth(&depth, 5, 0.5, 2.0);
406        // Pixels far from the edge should be nearly unchanged.
407        assert_abs_diff_eq!(filtered[[5, 0]], 1.0, epsilon = 0.1);
408        assert_abs_diff_eq!(filtered[[5, 9]], 5.0, epsilon = 0.1);
409    }
410
411    // ─── depth_normals ────────────────────────────────────────────────────
412
413    #[test]
414    fn test_depth_normals_shape() {
415        let depth = Array2::from_elem((6, 6), 1.0f32);
416        let normals = depth_normals(&depth, 100.0, 100.0);
417        assert_eq!(normals.dim(), (6, 6, 3));
418    }
419
420    #[test]
421    fn test_depth_normals_flat_surface_points_forward() {
422        // Flat surface – normals should point roughly in the -z direction
423        // (camera looks along +z).  For a flat depth image the cross product
424        // of the tangent vectors from the projection model points in the
425        // +z direction (into the scene).
426        let depth = Array2::from_elem((5, 5), 1.0f32);
427        let normals = depth_normals(&depth, 100.0, 100.0);
428        // The z-component of an interior pixel should be dominant.
429        let nz = normals[[2, 2, 2]].abs();
430        assert!(
431            nz > 0.5,
432            "z-component should dominate for a flat surface, got {nz}"
433        );
434    }
435
436    // ─── disparity_to_depth ───────────────────────────────────────────────
437
438    #[test]
439    fn test_disparity_to_depth_values() {
440        let disp = Array2::from_elem((3, 3), 2.0f32);
441        let depth = disparity_to_depth(&disp, 0.1, 500.0);
442        // depth = 500 * 0.1 / 2 = 25.0
443        for r in 0..3 {
444            for c in 0..3 {
445                assert_abs_diff_eq!(depth[[r, c]], 25.0, epsilon = 1e-3);
446            }
447        }
448    }
449
450    #[test]
451    fn test_disparity_to_depth_zero_disparity() {
452        let mut disp = Array2::from_elem((3, 3), 2.0f32);
453        disp[[1, 1]] = 0.0;
454        let depth = disparity_to_depth(&disp, 0.1, 500.0);
455        assert_abs_diff_eq!(depth[[1, 1]], 0.0, epsilon = 1e-6);
456    }
457}