Skip to main content

cv_pinhole/
essential.rs

1use crate::NormalizedKeyPoint;
2use cv_core::nalgebra::{Matrix3, Rotation3, Vector3, SVD};
3use cv_core::sample_consensus::Model;
4use cv_core::{Bearing, CameraToCamera, FeatureMatch, Pose};
5use derive_more::{AsMut, AsRef, Deref, DerefMut, From, Into};
6use num_traits::Float;
7
8/// This stores an essential matrix, which is satisfied by the following constraint:
9///
10/// transpose(x') * E * x = 0
11///
12/// Where `x'` and `x` are homogeneous normalized image coordinates. You can get a
13/// homogeneous normalized image coordinate by appending `1.0` to a `NormalizedKeyPoint`.
14///
15/// The essential matrix embodies the epipolar constraint between two images. Given that light
16/// travels in a perfectly straight line (it will not, but for short distances it mostly does)
17/// and assuming a pinhole camera model, for any point on the camera sensor, the light source
18/// for that point exists somewhere along a line extending out from the bearing (direction
19/// of travel) of that point. For a normalized image coordinate, that bearing is `(x, y, 1.0)`.
20/// That is because normalized image coordinates exist on a virtual plane (the sensor)
21/// a distance `z = 1.0` from the optical center (the location of the focal point) where
22/// the unit of distance is the focal length. In epipolar geometry, the point on the virtual
23/// plane pointing towards the second camera is called an epipole. The line through the image
24/// created by the projected points is called an epipolar line, and it extends from the epipole.
25///
26/// If you look at every point along a projection out of the camera, each one of those points would
27/// project onto the epipolar line on the camera sensor of another image.
28/// If you traced every point along the projection to where it would appear on the sensor
29/// of the camera (projection of the 3d points into normalized image coordinates), then
30/// the points would form the epipolar line. This means that you can draw epipolar lines
31/// so long as the projection does not pass through the optical center of both cameras.
32/// However, that situation is usually impossible, as one camera would be obscuring the feature
33/// for the other camera.
34///
35/// The essential matrix makes it possible to create a vector that is perpendicular to all
36/// bearings that are formed from the epipolar line on the second image's sensor. This is
37/// done by computing `E * x`, where `x` is a homogeneous normalized image coordinate
38/// from the first image. The transpose of the resulting vector then has a dot product
39/// with the transpose of the second image coordinate `x'` which is equal to `0.0`.
40/// This can be written as:
41///
42/// ```text
43/// dot(transpose(E * x), x') = 0
44/// ```
45///
46/// This can be re-written into the form given above:
47///
48/// ```text
49/// transpose(x') * E * x = 0
50/// ```
51///
52/// Where the first operation creates a pependicular vector to the epipoles on the first image
53/// and the second takes the dot product which should result in 0.
54#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, AsMut, AsRef, Deref, DerefMut, From, Into)]
55pub struct EssentialMatrix(pub Matrix3<f64>);
56
57impl EssentialMatrix {
58    /// Can be used to enforce the constraints of an essential matrix to fix it.
59    ///
60    /// This finds the closest essential matrix in frobenius form. This just means
61    /// that the two singular values are averaged and the null singular value is
62    /// forced to zero.
63    pub fn recondition(self, epsilon: f64, max_iterations: usize) -> Option<Self> {
64        let old_svd = self.try_svd(true, true, epsilon, max_iterations)?;
65        // We need to sort the singular values in the SVD.
66        let mut sources = [0, 1, 2];
67        sources.sort_unstable_by_key(|&ix| float_ord::FloatOrd(-old_svd.singular_values[ix]));
68        let mut svd = old_svd;
69        for (dest, &source) in sources.iter().enumerate() {
70            svd.singular_values[dest] = old_svd.singular_values[source];
71            svd.u
72                .as_mut()
73                .unwrap()
74                .column_mut(dest)
75                .copy_from(&old_svd.u.as_ref().unwrap().column(source));
76            svd.v_t
77                .as_mut()
78                .unwrap()
79                .row_mut(dest)
80                .copy_from(&old_svd.v_t.as_ref().unwrap().row(source));
81        }
82        // Now that the singular values are sorted, find the closest
83        // essential matrix to E in frobenius form.
84        // This consists of averaging the two non-zero singular values
85        // and zeroing out the near-zero singular value.
86        svd.singular_values[2] = 0.0;
87        let new_singular = (svd.singular_values[0] + svd.singular_values[1]) / 2.0;
88        svd.singular_values[0] = new_singular;
89        svd.singular_values[1] = new_singular;
90        // Cannot fail because we asked for both U and V* on decomp.
91        let mat = svd.recompose().unwrap();
92        Some(Self(mat))
93    }
94
95    /// Returns two possible rotations for the essential matrix along with a translation
96    /// bearing of arbitrary length. The translation bearing is not yet in the correct
97    /// space and the inverse rotation (transpose) must be multiplied by the translation
98    /// bearing to make the translation bearing be post-rotation. The translation's length
99    /// is unknown and of unknown sign and must be solved for by using a prior.
100    ///
101    /// `epsilon` is the threshold by which the singular value decomposition is considered
102    /// complete. Making this smaller may improve the precision. It is recommended to
103    /// set this to no higher than `1e-6`.
104    ///
105    /// `max_iterations` is the maximum number of iterations that singular value decomposition
106    /// will run on this matrix. Use this in soft realtime systems to cap the execution time.
107    /// A `max_iterations` of `0` may execute indefinitely and is not recommended.
108    ///
109    /// ```
110    /// use cv_core::CameraToCamera;
111    /// use cv_core::nalgebra::{IsometryMatrix3, Rotation3, Vector3};
112    /// use cv_pinhole::EssentialMatrix;
113    /// let pose = CameraToCamera(IsometryMatrix3::from_parts(
114    ///     Vector3::new(-0.8, 0.4, 0.5).into(),
115    ///     Rotation3::from_euler_angles(0.2, 0.3, 0.4),
116    /// ));
117    /// // Get the possible poses for the essential matrix created from `pose`.
118    /// let (rot_a, rot_b, t) = EssentialMatrix::from(pose).possible_rotations_unscaled_translation(1e-6, 50).unwrap();
119    /// // Compute residual rotations.
120    /// let a_res = rot_a.rotation_to(&pose.0.rotation).angle();
121    /// let b_res = rot_b.rotation_to(&pose.0.rotation).angle();
122    /// let a_close = a_res < 1e-4;
123    /// let b_close = b_res < 1e-4;
124    /// // At least one rotation is correct.
125    /// assert!(a_close || b_close);
126    /// // The translation points in the same (or reverse) direction
127    /// let t_res = 1.0 - t.normalize().dot(&pose.0.translation.vector.normalize()).abs();
128    /// assert!(t_res < 1e-4);
129    /// ```
130    pub fn possible_rotations_unscaled_translation(
131        &self,
132        epsilon: f64,
133        max_iterations: usize,
134    ) -> Option<(Rotation3<f64>, Rotation3<f64>, Vector3<f64>)> {
135        let Self(essential) = *self;
136        let essential = essential;
137
138        // `W` from https://en.wikipedia.org/wiki/Essential_matrix#Finding_one_solution.
139        let w = Matrix3::new(0.0, -1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0);
140        // Transpose of `W` from https://en.wikipedia.org/wiki/Essential_matrix#Finding_one_solution.
141        let wt = w.transpose();
142
143        // Perform SVD.
144        let svd = SVD::try_new(essential, true, true, epsilon, max_iterations);
145        // Extract only the U and V matrix from the SVD.
146        let u_v_t = svd.map(|svd| {
147            if let SVD {
148                u: Some(u),
149                v_t: Some(v_t),
150                singular_values,
151            } = svd
152            {
153                // Sort the singular vectors in U and V*.
154                let mut sources: [usize; 3] = [0, 1, 2];
155                sources.sort_unstable_by_key(|&ix| float_ord::FloatOrd(-singular_values[ix]));
156                let mut sorted_u = Matrix3::zeros();
157                let mut sorted_v_t = Matrix3::zeros();
158                for (&ix, mut column) in sources.iter().zip(sorted_u.column_iter_mut()) {
159                    column.copy_from(&u.column(ix));
160                }
161                for (&ix, mut row) in sources.iter().zip(sorted_v_t.row_iter_mut()) {
162                    row.copy_from(&v_t.row(ix));
163                }
164                (sorted_u, sorted_v_t)
165            } else {
166                panic!("Didn't get U and V matrix in SVD");
167            }
168        });
169        // Force the determinants to be positive. This is done to ensure the
170        // handedness of the rotation matrix is correct.
171        let u_v_t = u_v_t.map(|(mut u, mut v_t)| {
172            // Last column of U is undetermined since d = (a a 0).
173            if u.determinant() < 0.0 {
174                for n in u.column_mut(2).iter_mut() {
175                    *n *= -1.0;
176                }
177            }
178            // Last row of Vt is undetermined since d = (a a 0).
179            if v_t.determinant() < 0.0 {
180                for n in v_t.row_mut(2).iter_mut() {
181                    *n *= -1.0;
182                }
183            }
184            // Return positive determinant U and V*.
185            (u, v_t)
186        });
187        // Compute the possible rotations and the bearing with no normalization.
188        u_v_t.map(|(u, v_t)| {
189            (
190                Rotation3::from_matrix_unchecked(u * w * v_t),
191                Rotation3::from_matrix_unchecked(u * wt * v_t),
192                u.column(2).into_owned(),
193            )
194        })
195    }
196
197    /// See [`EssentialMatrix::possible_rotations_unscaled_translation`].
198    ///
199    /// This returns only the two rotations that are possible.
200    ///
201    /// ```
202    /// use cv_core::CameraToCamera;
203    /// use cv_core::nalgebra::{IsometryMatrix3, Rotation3, Vector3};
204    /// use cv_pinhole::EssentialMatrix;
205    /// let pose = CameraToCamera(IsometryMatrix3::from_parts(
206    ///     Vector3::new(-0.8, 0.4, 0.5).into(),
207    ///     Rotation3::from_euler_angles(0.2, 0.3, 0.4),
208    /// ));
209    /// // Get the possible rotations for the essential matrix created from `pose`.
210    /// let rbs = EssentialMatrix::from(pose).possible_rotations(1e-6, 50).unwrap();
211    /// let one_correct = rbs.iter().any(|&rot| {
212    ///     let angle_residual = rot.rotation_to(&pose.0.rotation).angle();
213    ///     angle_residual < 1e-4
214    /// });
215    /// assert!(one_correct);
216    /// ```
217    pub fn possible_rotations(
218        &self,
219        epsilon: f64,
220        max_iterations: usize,
221    ) -> Option<[Rotation3<f64>; 2]> {
222        self.possible_rotations_unscaled_translation(epsilon, max_iterations)
223            .map(|(rot_a, rot_b, _)| [rot_a, rot_b])
224    }
225
226    /// See [`EssentialMatrix::possible_rotations_unscaled_translation`].
227    ///
228    /// This returns the rotations and their corresponding post-rotation translation bearing.
229    ///
230    /// ```
231    /// use cv_core::CameraToCamera;
232    /// use cv_core::nalgebra::{IsometryMatrix3, Rotation3, Vector3};
233    /// use cv_pinhole::EssentialMatrix;
234    /// let pose = CameraToCamera(IsometryMatrix3::from_parts(
235    ///     Vector3::new(-0.8, 0.4, 0.5).into(),
236    ///     Rotation3::from_euler_angles(0.2, 0.3, 0.4),
237    /// ));
238    /// // Get the possible poses for the essential matrix created from `pose`.
239    /// let rbs = EssentialMatrix::from(pose).possible_unscaled_poses(1e-6, 50).unwrap();
240    /// let one_correct = rbs.iter().any(|&upose| {
241    ///     let angle_residual =
242    ///         upose.0.rotation.rotation_to(&pose.0.rotation).angle();
243    ///     let translation_residual =
244    ///         1.0 - upose.0.translation.vector.normalize()
245    ///                    .dot(&pose.0.translation.vector.normalize());
246    ///     angle_residual < 1e-4 && translation_residual < 1e-4
247    /// });
248    /// assert!(one_correct);
249    /// ```
250    pub fn possible_unscaled_poses(
251        &self,
252        epsilon: f64,
253        max_iterations: usize,
254    ) -> Option<[CameraToCamera; 4]> {
255        self.possible_rotations_unscaled_translation(epsilon, max_iterations)
256            .map(|(rot_a, rot_b, t)| {
257                [
258                    CameraToCamera::from_parts(t, rot_a),
259                    CameraToCamera::from_parts(t, rot_b),
260                    CameraToCamera::from_parts(-t, rot_a),
261                    CameraToCamera::from_parts(-t, rot_b),
262                ]
263            })
264    }
265
266    /// Same as [`EssentialMatrix::possible_unscaled_poses`], but it doesn't return
267    /// 4 unscaled poses since it doesn't bother to give back the different translation
268    /// directions and instead only gives one. This is useful if your algorithm doesn't
269    /// care about the direction of translation.
270    pub fn possible_unscaled_poses_bearing(
271        &self,
272        epsilon: f64,
273        max_iterations: usize,
274    ) -> Option<[CameraToCamera; 2]> {
275        self.possible_rotations_unscaled_translation(epsilon, max_iterations)
276            .map(|(rot_a, rot_b, t)| {
277                [
278                    CameraToCamera::from_parts(t, rot_a),
279                    CameraToCamera::from_parts(t, rot_b),
280                ]
281            })
282    }
283
284    /// See [`PoseSolver`].
285    ///
286    /// Creates a solver that allows you to solve for the pose using correspondences.
287    /// The pose may be scaled or unscaled, and if the `alloc` feature is enabled, you
288    /// can retrieve the inliers as well.
289    pub fn pose_solver(&self) -> PoseSolver<'_> {
290        PoseSolver {
291            essential: self,
292            epsilon: 1e-9,
293            max_iterations: 100,
294            consensus_ratio: 0.5,
295        }
296    }
297}
298
299/// Generates an essential matrix corresponding to this relative camera pose.
300///
301/// If a point `a` is transformed using [`Pose::transform`] into
302/// a point `b`, then the essential matrix returned by this method will
303/// give a residual of approximately `0.0` when you call
304/// `essential.residual(&FeatureMatch(a, b))`.
305///
306/// See the documentation of [`EssentialMatrix`] for more information.
307impl From<CameraToCamera> for EssentialMatrix {
308    fn from(pose: CameraToCamera) -> Self {
309        Self(pose.0.translation.vector.cross_matrix() * *pose.0.rotation.matrix())
310    }
311}
312
313impl Model<FeatureMatch<NormalizedKeyPoint>> for EssentialMatrix {
314    fn residual(&self, data: &FeatureMatch<NormalizedKeyPoint>) -> f64 {
315        let Self(mat) = *self;
316        let FeatureMatch(a, b) = data;
317        let normalized = |p: &NormalizedKeyPoint| {
318            let p = p.bearing_unnormalized();
319            p / p.z
320        };
321
322        // The result is a 1x1 matrix which we must get element 0 from.
323        Float::abs((normalized(b).transpose() * mat * normalized(a))[0])
324    }
325}
326
327/// Allows pose solving to be parameterized if defaults are not sufficient.
328#[derive(Copy, Clone, Debug)]
329pub struct PoseSolver<'a> {
330    essential: &'a EssentialMatrix,
331    epsilon: f64,
332    max_iterations: usize,
333    consensus_ratio: f64,
334}
335
336impl<'a> PoseSolver<'a> {
337    /// Set the epsilon to be used in SVD.
338    pub fn epsilon(self, epsilon: f64) -> Self {
339        Self { epsilon, ..self }
340    }
341
342    /// Set the max number of iterations to be used in SVD.
343    pub fn max_iterations(self, max_iterations: usize) -> Self {
344        Self {
345            max_iterations,
346            ..self
347        }
348    }
349
350    /// Set the level of agreement required for the pose to be accepted.
351    pub fn consensus_ratio(self, consensus_ratio: f64) -> Self {
352        Self {
353            consensus_ratio,
354            ..self
355        }
356    }
357
358    /// Return the [`CameraToCamera`] that transforms a [`CameraPoint`](crate::CameraPoint) of image
359    /// A (source of `a`) to the corresponding [`CameraPoint`](crate::CameraPoint) of image B (source of `b`).
360    /// The function takes an iterator over [`FeatureMatch`] from A to B.
361    /// The translation scale is unknown of the returned pose.
362    ///
363    /// * `depth` - The actual depth (`z` axis, not distance) of normalized keypoint `a`
364    /// * `a` - A keypoint from image `A`
365    /// * `b` - A keypoint from image `B`
366    ///
367    /// `self` must satisfy the constraint:
368    ///
369    /// ```text
370    /// transpose(homogeneous(a)) * E * homogeneous(b) = 0
371    /// ```
372    ///
373    /// Also, `a` and `b` must be a correspondence.
374    ///
375    /// This will take the average translation over the entire iterator. This is done
376    /// to smooth out noise and outliers (if present).
377    ///
378    /// `epsilon` is a small value to which SVD must converge to before terminating.
379    ///
380    /// `max_iterations` is the maximum number of iterations that SVD will run on this
381    /// matrix. Use this to cap the execution time.
382    /// A `max_iterations` of `0` may execute indefinitely and is not recommended except
383    /// for non-production code.
384    ///
385    /// `consensus_ratio` is the ratio of points which must be in front of the camera for the model
386    /// to be accepted and return Some. Otherwise, None is returned. Set this to about
387    /// `0.45` to have approximate majority consensus.
388    ///
389    /// `bearing_scale` is a function that is passed a translation bearing vector,
390    /// an untranslated (but rotated) camera point, and a normalized key point
391    /// where the actual point exists. It must return the scalar which the
392    /// translation bearing vector must by multiplied by to get the actual translation.
393    /// It may return `None` if it fails.
394    ///
395    /// `correspondences` must provide an iterator of tuples containing the matches
396    /// of a 3d `CameraPoint` `a` from camera A and the matching `NormalizedKeyPoint`
397    /// `b` from camera B.
398    ///
399    /// This does not communicate which points were outliers to each model.
400    pub fn solve_unscaled(
401        &self,
402        correspondences: impl Iterator<Item = FeatureMatch<NormalizedKeyPoint>>,
403    ) -> Option<CameraToCamera> {
404        // Get the possible rotations and the translation
405        self.essential
406            .possible_unscaled_poses(self.epsilon, self.max_iterations)
407            .and_then(|poses| {
408                // Get the net translation scale of points that agree with a and b
409                // in addition to the number of points that agree with a and b.
410                let (ts, total) = correspondences.fold(
411                    ([0usize; 4], 0usize),
412                    |(mut ts, total), FeatureMatch(a, b)| {
413                        let trans_and_agree = |pose: CameraToCamera| {
414                            // Put the second camera position back into the first camera's frame of reference.
415                            let p = -(pose.0.rotation.inverse() * pose.0.translation.vector);
416                            let a = a.virtual_image_point().coords;
417                            // Transform the bearing B back into camera A's space (its a vector, so only rotation is applied).
418                            let b = pose.0.rotation.inverse() * b.virtual_image_point().coords;
419                            let a_squared = a.norm_squared();
420                            let b_squared = b.norm_squared();
421                            let a_b = a.dot(&b);
422                            let a_pos = a.dot(&p);
423                            let b_pos = b.dot(&p);
424
425                            // Check chirality constraint.
426                            b_squared * a_pos - a_b * b_pos > 0.0
427                                && a_b * a_pos - a_squared * b_pos > 0.0
428                        };
429
430                        // Do it for all poses.
431                        for (tn, &pose) in ts.iter_mut().zip(&poses) {
432                            if trans_and_agree(pose) {
433                                *tn += 1;
434                            }
435                        }
436
437                        (ts, total + 1)
438                    },
439                );
440
441                // Ensure that there is at least one point.
442                if total == 0 {
443                    return None;
444                }
445
446                // Ensure that the best one exceeds the consensus ratio.
447                let (ix, best) = ts
448                    .iter()
449                    .copied()
450                    .enumerate()
451                    .max_by_key(|&(_, t)| t)
452                    .unwrap();
453                if (best as f64) < self.consensus_ratio * total as f64 && best != 0 {
454                    return None;
455                }
456
457                Some(poses[ix])
458            })
459    }
460
461    /// Same as [`PoseSolver::solve_unscaled`], but also communicates the inliers.
462    ///
463    /// The `alloc` feature must be enabled to use this method.
464    #[cfg(feature = "alloc")]
465    pub fn solve_unscaled_inliers(
466        &self,
467        correspondences: impl Iterator<Item = FeatureMatch<NormalizedKeyPoint>>,
468    ) -> Option<(CameraToCamera, alloc::vec::Vec<usize>)> {
469        // Get the possible rotations and the translation
470        self.essential
471            .possible_unscaled_poses(self.epsilon, self.max_iterations)
472            .and_then(|poses| {
473                // Get the net translation scale of points that agree with a and b
474                // in addition to the number of points that agree with a and b.
475                let (mut ts, total) = correspondences.enumerate().fold(
476                    (
477                        [
478                            (0usize, alloc::vec::Vec::new()),
479                            (0usize, alloc::vec::Vec::new()),
480                            (0usize, alloc::vec::Vec::new()),
481                            (0usize, alloc::vec::Vec::new()),
482                        ],
483                        0usize,
484                    ),
485                    |(mut ts, total), (ix, FeatureMatch(a, b))| {
486                        let trans_and_agree = |pose: CameraToCamera| {
487                            // Put the second camera position back into the first camera's frame of reference.
488                            let p = -(pose.0.rotation.inverse() * pose.0.translation.vector);
489                            let a = a.virtual_image_point().coords;
490                            // Transform the bearing B back into camera A's space (its a vector, so only rotation is applied).
491                            let b = pose.0.rotation.inverse() * b.virtual_image_point().coords;
492                            let a_squared = a.norm_squared();
493                            let b_squared = b.norm_squared();
494                            let a_b = a.dot(&b);
495                            let a_pos = a.dot(&p);
496                            let b_pos = b.dot(&p);
497
498                            // Check chirality constraint.
499                            b_squared * a_pos - a_b * b_pos > 0.0
500                                && a_b * a_pos - a_squared * b_pos > 0.0
501                        };
502
503                        // Do it for all poses.
504                        for ((tn, ti), &pose) in ts.iter_mut().zip(&poses) {
505                            if trans_and_agree(pose) {
506                                *tn += 1;
507                                ti.push(ix);
508                            }
509                        }
510
511                        (ts, total + 1)
512                    },
513                );
514
515                // Ensure that there is at least one point.
516                if total == 0 {
517                    return None;
518                }
519
520                // Ensure that the best one exceeds the consensus ratio.
521                let (ix, best) = ts
522                    .iter()
523                    .map(|&(tn, _)| tn)
524                    .enumerate()
525                    .max_by_key(|&(_, t)| t)
526                    .unwrap();
527                if (best as f64) < self.consensus_ratio * total as f64 && best != 0 {
528                    return None;
529                }
530
531                let inliers = core::mem::take(&mut ts[ix].1);
532
533                Some((poses[ix], inliers))
534            })
535    }
536}