Skip to main content

scirs2_vision/
vision_3d.rs

1//! 3D Vision module - Stereo vision, depth estimation, and structure from motion
2//!
3//! This module provides capabilities for 3D reconstruction, stereo matching,
4//! depth estimation, and structure from motion.
5
6use crate::error::{Result, VisionError};
7use image::{DynamicImage, GrayImage, Rgb, RgbImage};
8use scirs2_core::ndarray::{Array2, Array3};
9use std::collections::HashMap;
10
11/// Stereo matching algorithm
12#[derive(Debug, Clone, Copy, PartialEq)]
13pub enum StereoAlgorithm {
14    /// Semi-Global Block Matching
15    SGBM,
16    /// Block Matching
17    BM,
18    /// Graph Cut stereo
19    GraphCut,
20}
21
22/// Stereo matching parameters for SGBM
23#[derive(Debug, Clone)]
24pub struct SGBMParams {
25    /// Minimum disparity
26    pub min_disparity: i32,
27    /// Number of disparities (must be divisible by 16)
28    pub num_disparities: i32,
29    /// Block size (odd number >= 1)
30    pub block_size: usize,
31    /// P1 parameter for smoothness
32    pub p1: i32,
33    /// P2 parameter for smoothness
34    pub p2: i32,
35    /// Maximum allowed difference
36    pub disp_12_max_diff: i32,
37    /// Pre-filter cap
38    pub pre_filter_cap: i32,
39    /// Uniqueness ratio (0-100)
40    pub uniqueness_ratio: i32,
41    /// Speckle window size
42    pub speckle_window_size: usize,
43    /// Speckle range
44    pub speckle_range: i32,
45}
46
47impl Default for SGBMParams {
48    fn default() -> Self {
49        Self {
50            min_disparity: 0,
51            num_disparities: 64,
52            block_size: 5,
53            p1: 8 * 3 * 5 * 5,  // 8 * number of channels * block_size^2
54            p2: 32 * 3 * 5 * 5, // 32 * number of channels * block_size^2
55            disp_12_max_diff: 1,
56            pre_filter_cap: 63,
57            uniqueness_ratio: 10,
58            speckle_window_size: 100,
59            speckle_range: 32,
60        }
61    }
62}
63
64/// Block Matching parameters
65#[derive(Debug, Clone)]
66pub struct BMParams {
67    /// Minimum disparity
68    pub min_disparity: i32,
69    /// Number of disparities
70    pub num_disparities: i32,
71    /// Block size
72    pub block_size: usize,
73}
74
75impl Default for BMParams {
76    fn default() -> Self {
77        Self {
78            min_disparity: 0,
79            num_disparities: 64,
80            block_size: 15,
81        }
82    }
83}
84
85/// Disparity map result
86#[derive(Debug, Clone)]
87pub struct DisparityMap {
88    /// Disparity values (height × width)
89    pub disparity: Array2<f32>,
90    /// Confidence map
91    pub confidence: Array2<f32>,
92}
93
94impl DisparityMap {
95    /// Convert disparity to depth map
96    ///
97    /// # Arguments
98    ///
99    /// * `focal_length` - Camera focal length in pixels
100    /// * `baseline` - Stereo baseline in meters
101    ///
102    /// # Returns
103    ///
104    /// * Depth map in meters
105    pub fn to_depth_map(&self, focal_length: f32, baseline: f32) -> Array2<f32> {
106        let (height, width) = self.disparity.dim();
107        let mut depth = Array2::zeros((height, width));
108
109        for y in 0..height {
110            for x in 0..width {
111                let disp = self.disparity[[y, x]];
112                if disp > 0.0 {
113                    depth[[y, x]] = (focal_length * baseline) / disp;
114                }
115            }
116        }
117
118        depth
119    }
120
121    /// Visualize disparity map as color image
122    pub fn visualize(&self) -> Result<RgbImage> {
123        let (height, width) = self.disparity.dim();
124        let mut img = RgbImage::new(width as u32, height as u32);
125
126        // Find min and max disparity for normalization
127        let mut min_disp = f32::INFINITY;
128        let mut max_disp = f32::NEG_INFINITY;
129
130        for y in 0..height {
131            for x in 0..width {
132                let disp = self.disparity[[y, x]];
133                if disp > 0.0 {
134                    min_disp = min_disp.min(disp);
135                    max_disp = max_disp.max(disp);
136                }
137            }
138        }
139
140        let range = max_disp - min_disp;
141        if range == 0.0 {
142            return Ok(img);
143        }
144
145        for y in 0..height {
146            for x in 0..width {
147                let disp = self.disparity[[y, x]];
148                if disp > 0.0 {
149                    let normalized = ((disp - min_disp) / range * 255.0) as u8;
150                    img.put_pixel(
151                        x as u32,
152                        y as u32,
153                        Rgb([normalized, normalized, normalized]),
154                    );
155                } else {
156                    img.put_pixel(x as u32, y as u32, Rgb([0, 0, 0]));
157                }
158            }
159        }
160
161        Ok(img)
162    }
163}
164
165/// Stereo matching using Semi-Global Block Matching (SGBM)
166///
167/// # Arguments
168///
169/// * `left` - Left stereo image
170/// * `right` - Right stereo image
171/// * `params` - SGBM parameters
172///
173/// # Returns
174///
175/// * Disparity map
176pub fn stereo_sgbm(
177    left: &DynamicImage,
178    right: &DynamicImage,
179    params: &SGBMParams,
180) -> Result<DisparityMap> {
181    let left_gray = left.to_luma8();
182    let right_gray = right.to_luma8();
183
184    if left_gray.dimensions() != right_gray.dimensions() {
185        return Err(VisionError::InvalidParameter(
186            "Left and right images must have same dimensions".to_string(),
187        ));
188    }
189
190    let (width, height) = left_gray.dimensions();
191    let mut disparity = Array2::zeros((height as usize, width as usize));
192    let mut confidence = Array2::zeros((height as usize, width as usize));
193
194    // Semi-global matching algorithm
195    for y in (params.block_size / 2)..(height as usize - params.block_size / 2) {
196        for x in (params.block_size / 2)..(width as usize - params.block_size / 2) {
197            let mut best_disp = 0;
198            let mut best_cost = f32::INFINITY;
199
200            // Search for best disparity
201            for d in params.min_disparity..(params.min_disparity + params.num_disparities) {
202                let match_x = x as i32 - d;
203                if match_x < (params.block_size / 2) as i32 {
204                    continue;
205                }
206                if match_x >= (width as i32 - params.block_size as i32 / 2) {
207                    continue;
208                }
209
210                // Compute matching cost using SAD (Sum of Absolute Differences)
211                let mut sad = 0.0f32;
212                let radius = params.block_size / 2;
213
214                for dy in 0..params.block_size {
215                    for dx in 0..params.block_size {
216                        let ly = y + dy - radius;
217                        let lx = x + dx - radius;
218                        let rx = match_x as usize + dx - radius;
219
220                        let left_val = left_gray.get_pixel(lx as u32, ly as u32)[0] as f32;
221                        let right_val = right_gray.get_pixel(rx as u32, ly as u32)[0] as f32;
222
223                        sad += (left_val - right_val).abs();
224                    }
225                }
226
227                // Apply smoothness penalty
228                let smoothness_cost = if d > 0 {
229                    let prev_cost = if x > 0 { disparity[[y, x - 1]] } else { 0.0 };
230                    let diff = (d as f32 - prev_cost).abs();
231                    if diff == 1.0 {
232                        params.p1 as f32
233                    } else if diff > 1.0 {
234                        params.p2 as f32
235                    } else {
236                        0.0
237                    }
238                } else {
239                    0.0
240                };
241
242                let total_cost = sad + smoothness_cost;
243
244                if total_cost < best_cost {
245                    best_cost = total_cost;
246                    best_disp = d;
247                }
248            }
249
250            disparity[[y, x]] = best_disp as f32;
251            confidence[[y, x]] = 1.0 / (1.0 + best_cost);
252        }
253    }
254
255    // Post-processing: remove speckles
256    if params.speckle_window_size > 0 {
257        remove_speckles(
258            &mut disparity,
259            params.speckle_window_size,
260            params.speckle_range,
261        )?;
262    }
263
264    Ok(DisparityMap {
265        disparity,
266        confidence,
267    })
268}
269
270/// Stereo matching using Block Matching (BM)
271///
272/// # Arguments
273///
274/// * `left` - Left stereo image
275/// * `right` - Right stereo image
276/// * `params` - BM parameters
277///
278/// # Returns
279///
280/// * Disparity map
281pub fn stereo_bm(
282    left: &DynamicImage,
283    right: &DynamicImage,
284    params: &BMParams,
285) -> Result<DisparityMap> {
286    let left_gray = left.to_luma8();
287    let right_gray = right.to_luma8();
288
289    if left_gray.dimensions() != right_gray.dimensions() {
290        return Err(VisionError::InvalidParameter(
291            "Left and right images must have same dimensions".to_string(),
292        ));
293    }
294
295    let (width, height) = left_gray.dimensions();
296    let mut disparity = Array2::zeros((height as usize, width as usize));
297    let mut confidence = Array2::zeros((height as usize, width as usize));
298
299    let radius = params.block_size / 2;
300
301    for y in radius..(height as usize - radius) {
302        for x in radius..(width as usize - radius) {
303            let mut best_disp = 0;
304            let mut best_cost = f32::INFINITY;
305
306            // Search for best disparity
307            for d in params.min_disparity..(params.min_disparity + params.num_disparities) {
308                let match_x = x as i32 - d;
309                if match_x < radius as i32 || match_x >= (width as i32 - radius as i32) {
310                    continue;
311                }
312
313                // Compute SAD
314                let mut sad = 0.0f32;
315
316                for dy in 0..params.block_size {
317                    for dx in 0..params.block_size {
318                        let ly = y + dy - radius;
319                        let lx = x + dx - radius;
320                        let rx = match_x as usize + dx - radius;
321
322                        let left_val = left_gray.get_pixel(lx as u32, ly as u32)[0] as f32;
323                        let right_val = right_gray.get_pixel(rx as u32, ly as u32)[0] as f32;
324
325                        sad += (left_val - right_val).abs();
326                    }
327                }
328
329                if sad < best_cost {
330                    best_cost = sad;
331                    best_disp = d;
332                }
333            }
334
335            disparity[[y, x]] = best_disp as f32;
336            confidence[[y, x]] =
337                1.0 / (1.0 + best_cost / (params.block_size * params.block_size) as f32);
338        }
339    }
340
341    Ok(DisparityMap {
342        disparity,
343        confidence,
344    })
345}
346
347/// Remove speckles from disparity map
348fn remove_speckles(disparity: &mut Array2<f32>, window_size: usize, max_diff: i32) -> Result<()> {
349    let (height, width) = disparity.dim();
350    let mut labels = Array2::zeros((height, width));
351    let mut label_id = 1;
352
353    // Connected component labeling
354    for y in 0..height {
355        for x in 0..width {
356            if disparity[[y, x]] > 0.0 && labels[[y, x]] == 0 {
357                // Flood fill
358                let mut stack = vec![(y, x)];
359                let mut region_size = 0;
360                let base_disp = disparity[[y, x]];
361
362                while let Some((cy, cx)) = stack.pop() {
363                    if labels[[cy, cx]] > 0 {
364                        continue;
365                    }
366
367                    let disp = disparity[[cy, cx]];
368                    if (disp - base_disp).abs() > max_diff as f32 {
369                        continue;
370                    }
371
372                    labels[[cy, cx]] = label_id;
373                    region_size += 1;
374
375                    // Add neighbors
376                    for (dy, dx) in &[(-1, 0), (1, 0), (0, -1), (0, 1)] {
377                        let ny = cy as i32 + dy;
378                        let nx = cx as i32 + dx;
379
380                        if ny >= 0 && ny < height as i32 && nx >= 0 && nx < width as i32 {
381                            let ny = ny as usize;
382                            let nx = nx as usize;
383                            if disparity[[ny, nx]] > 0.0 && labels[[ny, nx]] == 0 {
384                                stack.push((ny, nx));
385                            }
386                        }
387                    }
388                }
389
390                // Remove small regions
391                if region_size < window_size {
392                    for y in 0..height {
393                        for x in 0..width {
394                            if labels[[y, x]] == label_id {
395                                disparity[[y, x]] = 0.0;
396                            }
397                        }
398                    }
399                }
400
401                label_id += 1;
402            }
403        }
404    }
405
406    Ok(())
407}
408
409/// 3D point cloud
410#[derive(Debug, Clone)]
411pub struct PointCloud {
412    /// 3D points (N × 3: x, y, z)
413    pub points: Array2<f32>,
414    /// RGB colors (N × 3: r, g, b)
415    pub colors: Option<Array2<u8>>,
416}
417
418/// Triangulate 3D points from stereo disparity
419///
420/// # Arguments
421///
422/// * `disparity` - Disparity map
423/// * `focal_length` - Camera focal length in pixels
424/// * `baseline` - Stereo baseline in meters
425/// * `cx` - Principal point x-coordinate
426/// * `cy` - Principal point y-coordinate
427///
428/// # Returns
429///
430/// * 3D point cloud
431pub fn triangulate_points(
432    disparity: &DisparityMap,
433    focal_length: f32,
434    baseline: f32,
435    cx: f32,
436    cy: f32,
437) -> Result<PointCloud> {
438    let (height, width) = disparity.disparity.dim();
439    let mut points_vec = Vec::new();
440
441    for y in 0..height {
442        for x in 0..width {
443            let d = disparity.disparity[[y, x]];
444            if d > 0.0 {
445                // Compute 3D coordinates
446                let z = (focal_length * baseline) / d;
447                let x3d = ((x as f32 - cx) * z) / focal_length;
448                let y3d = ((y as f32 - cy) * z) / focal_length;
449
450                points_vec.push([x3d, y3d, z]);
451            }
452        }
453    }
454
455    let num_points = points_vec.len();
456    let mut points = Array2::zeros((num_points, 3));
457
458    for (i, p) in points_vec.iter().enumerate() {
459        points[[i, 0]] = p[0];
460        points[[i, 1]] = p[1];
461        points[[i, 2]] = p[2];
462    }
463
464    Ok(PointCloud {
465        points,
466        colors: None,
467    })
468}
469
470/// Structure from Motion (SfM) result
471#[derive(Debug, Clone)]
472pub struct SfMResult {
473    /// 3D point cloud
474    pub point_cloud: PointCloud,
475    /// Camera poses (N × 12: 3×4 projection matrices)
476    pub camera_poses: Vec<Array2<f32>>,
477    /// Feature correspondences
478    pub correspondences: HashMap<usize, Vec<(usize, f32, f32)>>,
479}
480
481/// Perform structure from motion reconstruction
482///
483/// # Arguments
484///
485/// * `images` - Sequence of images
486/// * `focal_length` - Camera focal length
487///
488/// # Returns
489///
490/// * SfM reconstruction result
491pub fn structure_from_motion(images: &[DynamicImage], focal_length: f32) -> Result<SfMResult> {
492    if images.len() < 2 {
493        return Err(VisionError::InvalidParameter(
494            "Need at least 2 images for SfM".to_string(),
495        ));
496    }
497
498    // Placeholder implementation
499    // In a real implementation, this would:
500    // 1. Extract features from all images
501    // 2. Match features across images
502    // 3. Estimate camera poses
503    // 4. Triangulate 3D points
504    // 5. Bundle adjustment
505
506    let point_cloud = PointCloud {
507        points: Array2::zeros((0, 3)),
508        colors: None,
509    };
510
511    let camera_poses = Vec::new();
512    let correspondences = HashMap::new();
513
514    Ok(SfMResult {
515        point_cloud,
516        camera_poses,
517        correspondences,
518    })
519}
520
521#[cfg(test)]
522mod tests {
523    use super::*;
524    use image::RgbImage;
525
526    #[test]
527    fn test_sgbm_params_default() {
528        let params = SGBMParams::default();
529        assert_eq!(params.block_size, 5);
530        assert_eq!(params.num_disparities, 64);
531    }
532
533    #[test]
534    fn test_bm_params_default() {
535        let params = BMParams::default();
536        assert_eq!(params.block_size, 15);
537    }
538
539    #[test]
540    fn test_stereo_bm() {
541        let left = DynamicImage::ImageRgb8(RgbImage::new(64, 64));
542        let right = DynamicImage::ImageRgb8(RgbImage::new(64, 64));
543
544        let result = stereo_bm(&left, &right, &BMParams::default());
545        assert!(result.is_ok());
546    }
547
548    #[test]
549    fn test_stereo_sgbm() {
550        let left = DynamicImage::ImageRgb8(RgbImage::new(64, 64));
551        let right = DynamicImage::ImageRgb8(RgbImage::new(64, 64));
552
553        let result = stereo_sgbm(&left, &right, &SGBMParams::default());
554        assert!(result.is_ok());
555    }
556
557    #[test]
558    fn test_disparity_to_depth() {
559        let disparity = Array2::from_elem((10, 10), 1.0);
560        let confidence = Array2::from_elem((10, 10), 1.0);
561        let disp_map = DisparityMap {
562            disparity,
563            confidence,
564        };
565
566        let depth = disp_map.to_depth_map(500.0, 0.1);
567        assert_eq!(depth.dim(), (10, 10));
568    }
569
570    #[test]
571    fn test_disparity_visualize() {
572        let disparity = Array2::from_elem((10, 10), 1.0);
573        let confidence = Array2::from_elem((10, 10), 1.0);
574        let disp_map = DisparityMap {
575            disparity,
576            confidence,
577        };
578
579        let vis = disp_map.visualize();
580        assert!(vis.is_ok());
581    }
582
583    #[test]
584    fn test_triangulate_points() {
585        let disparity = Array2::from_elem((10, 10), 1.0);
586        let confidence = Array2::from_elem((10, 10), 1.0);
587        let disp_map = DisparityMap {
588            disparity,
589            confidence,
590        };
591
592        let cloud = triangulate_points(&disp_map, 500.0, 0.1, 320.0, 240.0);
593        assert!(cloud.is_ok());
594    }
595
596    #[test]
597    fn test_sfm_insufficient_images() {
598        let images = vec![DynamicImage::ImageRgb8(RgbImage::new(64, 64))];
599        let result = structure_from_motion(&images, 500.0);
600        assert!(result.is_err());
601    }
602}