cv_pinhole/essential.rs
1use crate::NormalizedKeyPoint;
2use cv_core::nalgebra::{Matrix3, Rotation3, Vector3, SVD};
3use cv_core::sample_consensus::Model;
4use cv_core::{Bearing, CameraToCamera, FeatureMatch, Pose};
5use derive_more::{AsMut, AsRef, Deref, DerefMut, From, Into};
6use num_traits::Float;
7
8/// This stores an essential matrix, which is satisfied by the following constraint:
9///
10/// transpose(x') * E * x = 0
11///
12/// Where `x'` and `x` are homogeneous normalized image coordinates. You can get a
13/// homogeneous normalized image coordinate by appending `1.0` to a `NormalizedKeyPoint`.
14///
15/// The essential matrix embodies the epipolar constraint between two images. Given that light
16/// travels in a perfectly straight line (it will not, but for short distances it mostly does)
17/// and assuming a pinhole camera model, for any point on the camera sensor, the light source
18/// for that point exists somewhere along a line extending out from the bearing (direction
19/// of travel) of that point. For a normalized image coordinate, that bearing is `(x, y, 1.0)`.
20/// That is because normalized image coordinates exist on a virtual plane (the sensor)
21/// a distance `z = 1.0` from the optical center (the location of the focal point) where
22/// the unit of distance is the focal length. In epipolar geometry, the point on the virtual
23/// plane pointing towards the second camera is called an epipole. The line through the image
24/// created by the projected points is called an epipolar line, and it extends from the epipole.
25///
26/// If you look at every point along a projection out of the camera, each one of those points would
27/// project onto the epipolar line on the camera sensor of another image.
28/// If you traced every point along the projection to where it would appear on the sensor
29/// of the camera (projection of the 3d points into normalized image coordinates), then
30/// the points would form the epipolar line. This means that you can draw epipolar lines
31/// so long as the projection does not pass through the optical center of both cameras.
32/// However, that situation is usually impossible, as one camera would be obscuring the feature
33/// for the other camera.
34///
35/// The essential matrix makes it possible to create a vector that is perpendicular to all
36/// bearings that are formed from the epipolar line on the second image's sensor. This is
37/// done by computing `E * x`, where `x` is a homogeneous normalized image coordinate
38/// from the first image. The transpose of the resulting vector then has a dot product
39/// with the transpose of the second image coordinate `x'` which is equal to `0.0`.
40/// This can be written as:
41///
42/// ```text
43/// dot(transpose(E * x), x') = 0
44/// ```
45///
46/// This can be re-written into the form given above:
47///
48/// ```text
49/// transpose(x') * E * x = 0
50/// ```
51///
52/// Where the first operation creates a pependicular vector to the epipoles on the first image
53/// and the second takes the dot product which should result in 0.
54#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, AsMut, AsRef, Deref, DerefMut, From, Into)]
55pub struct EssentialMatrix(pub Matrix3<f64>);
56
57impl EssentialMatrix {
58 /// Can be used to enforce the constraints of an essential matrix to fix it.
59 ///
60 /// This finds the closest essential matrix in frobenius form. This just means
61 /// that the two singular values are averaged and the null singular value is
62 /// forced to zero.
63 pub fn recondition(self, epsilon: f64, max_iterations: usize) -> Option<Self> {
64 let old_svd = self.try_svd(true, true, epsilon, max_iterations)?;
65 // We need to sort the singular values in the SVD.
66 let mut sources = [0, 1, 2];
67 sources.sort_unstable_by_key(|&ix| float_ord::FloatOrd(-old_svd.singular_values[ix]));
68 let mut svd = old_svd;
69 for (dest, &source) in sources.iter().enumerate() {
70 svd.singular_values[dest] = old_svd.singular_values[source];
71 svd.u
72 .as_mut()
73 .unwrap()
74 .column_mut(dest)
75 .copy_from(&old_svd.u.as_ref().unwrap().column(source));
76 svd.v_t
77 .as_mut()
78 .unwrap()
79 .row_mut(dest)
80 .copy_from(&old_svd.v_t.as_ref().unwrap().row(source));
81 }
82 // Now that the singular values are sorted, find the closest
83 // essential matrix to E in frobenius form.
84 // This consists of averaging the two non-zero singular values
85 // and zeroing out the near-zero singular value.
86 svd.singular_values[2] = 0.0;
87 let new_singular = (svd.singular_values[0] + svd.singular_values[1]) / 2.0;
88 svd.singular_values[0] = new_singular;
89 svd.singular_values[1] = new_singular;
90 // Cannot fail because we asked for both U and V* on decomp.
91 let mat = svd.recompose().unwrap();
92 Some(Self(mat))
93 }
94
95 /// Returns two possible rotations for the essential matrix along with a translation
96 /// bearing of arbitrary length. The translation bearing is not yet in the correct
97 /// space and the inverse rotation (transpose) must be multiplied by the translation
98 /// bearing to make the translation bearing be post-rotation. The translation's length
99 /// is unknown and of unknown sign and must be solved for by using a prior.
100 ///
101 /// `epsilon` is the threshold by which the singular value decomposition is considered
102 /// complete. Making this smaller may improve the precision. It is recommended to
103 /// set this to no higher than `1e-6`.
104 ///
105 /// `max_iterations` is the maximum number of iterations that singular value decomposition
106 /// will run on this matrix. Use this in soft realtime systems to cap the execution time.
107 /// A `max_iterations` of `0` may execute indefinitely and is not recommended.
108 ///
109 /// ```
110 /// use cv_core::CameraToCamera;
111 /// use cv_core::nalgebra::{IsometryMatrix3, Rotation3, Vector3};
112 /// use cv_pinhole::EssentialMatrix;
113 /// let pose = CameraToCamera(IsometryMatrix3::from_parts(
114 /// Vector3::new(-0.8, 0.4, 0.5).into(),
115 /// Rotation3::from_euler_angles(0.2, 0.3, 0.4),
116 /// ));
117 /// // Get the possible poses for the essential matrix created from `pose`.
118 /// let (rot_a, rot_b, t) = EssentialMatrix::from(pose).possible_rotations_unscaled_translation(1e-6, 50).unwrap();
119 /// // Compute residual rotations.
120 /// let a_res = rot_a.rotation_to(&pose.0.rotation).angle();
121 /// let b_res = rot_b.rotation_to(&pose.0.rotation).angle();
122 /// let a_close = a_res < 1e-4;
123 /// let b_close = b_res < 1e-4;
124 /// // At least one rotation is correct.
125 /// assert!(a_close || b_close);
126 /// // The translation points in the same (or reverse) direction
127 /// let t_res = 1.0 - t.normalize().dot(&pose.0.translation.vector.normalize()).abs();
128 /// assert!(t_res < 1e-4);
129 /// ```
130 pub fn possible_rotations_unscaled_translation(
131 &self,
132 epsilon: f64,
133 max_iterations: usize,
134 ) -> Option<(Rotation3<f64>, Rotation3<f64>, Vector3<f64>)> {
135 let Self(essential) = *self;
136 let essential = essential;
137
138 // `W` from https://en.wikipedia.org/wiki/Essential_matrix#Finding_one_solution.
139 let w = Matrix3::new(0.0, -1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0);
140 // Transpose of `W` from https://en.wikipedia.org/wiki/Essential_matrix#Finding_one_solution.
141 let wt = w.transpose();
142
143 // Perform SVD.
144 let svd = SVD::try_new(essential, true, true, epsilon, max_iterations);
145 // Extract only the U and V matrix from the SVD.
146 let u_v_t = svd.map(|svd| {
147 if let SVD {
148 u: Some(u),
149 v_t: Some(v_t),
150 singular_values,
151 } = svd
152 {
153 // Sort the singular vectors in U and V*.
154 let mut sources: [usize; 3] = [0, 1, 2];
155 sources.sort_unstable_by_key(|&ix| float_ord::FloatOrd(-singular_values[ix]));
156 let mut sorted_u = Matrix3::zeros();
157 let mut sorted_v_t = Matrix3::zeros();
158 for (&ix, mut column) in sources.iter().zip(sorted_u.column_iter_mut()) {
159 column.copy_from(&u.column(ix));
160 }
161 for (&ix, mut row) in sources.iter().zip(sorted_v_t.row_iter_mut()) {
162 row.copy_from(&v_t.row(ix));
163 }
164 (sorted_u, sorted_v_t)
165 } else {
166 panic!("Didn't get U and V matrix in SVD");
167 }
168 });
169 // Force the determinants to be positive. This is done to ensure the
170 // handedness of the rotation matrix is correct.
171 let u_v_t = u_v_t.map(|(mut u, mut v_t)| {
172 // Last column of U is undetermined since d = (a a 0).
173 if u.determinant() < 0.0 {
174 for n in u.column_mut(2).iter_mut() {
175 *n *= -1.0;
176 }
177 }
178 // Last row of Vt is undetermined since d = (a a 0).
179 if v_t.determinant() < 0.0 {
180 for n in v_t.row_mut(2).iter_mut() {
181 *n *= -1.0;
182 }
183 }
184 // Return positive determinant U and V*.
185 (u, v_t)
186 });
187 // Compute the possible rotations and the bearing with no normalization.
188 u_v_t.map(|(u, v_t)| {
189 (
190 Rotation3::from_matrix_unchecked(u * w * v_t),
191 Rotation3::from_matrix_unchecked(u * wt * v_t),
192 u.column(2).into_owned(),
193 )
194 })
195 }
196
197 /// See [`EssentialMatrix::possible_rotations_unscaled_translation`].
198 ///
199 /// This returns only the two rotations that are possible.
200 ///
201 /// ```
202 /// use cv_core::CameraToCamera;
203 /// use cv_core::nalgebra::{IsometryMatrix3, Rotation3, Vector3};
204 /// use cv_pinhole::EssentialMatrix;
205 /// let pose = CameraToCamera(IsometryMatrix3::from_parts(
206 /// Vector3::new(-0.8, 0.4, 0.5).into(),
207 /// Rotation3::from_euler_angles(0.2, 0.3, 0.4),
208 /// ));
209 /// // Get the possible rotations for the essential matrix created from `pose`.
210 /// let rbs = EssentialMatrix::from(pose).possible_rotations(1e-6, 50).unwrap();
211 /// let one_correct = rbs.iter().any(|&rot| {
212 /// let angle_residual = rot.rotation_to(&pose.0.rotation).angle();
213 /// angle_residual < 1e-4
214 /// });
215 /// assert!(one_correct);
216 /// ```
217 pub fn possible_rotations(
218 &self,
219 epsilon: f64,
220 max_iterations: usize,
221 ) -> Option<[Rotation3<f64>; 2]> {
222 self.possible_rotations_unscaled_translation(epsilon, max_iterations)
223 .map(|(rot_a, rot_b, _)| [rot_a, rot_b])
224 }
225
226 /// See [`EssentialMatrix::possible_rotations_unscaled_translation`].
227 ///
228 /// This returns the rotations and their corresponding post-rotation translation bearing.
229 ///
230 /// ```
231 /// use cv_core::CameraToCamera;
232 /// use cv_core::nalgebra::{IsometryMatrix3, Rotation3, Vector3};
233 /// use cv_pinhole::EssentialMatrix;
234 /// let pose = CameraToCamera(IsometryMatrix3::from_parts(
235 /// Vector3::new(-0.8, 0.4, 0.5).into(),
236 /// Rotation3::from_euler_angles(0.2, 0.3, 0.4),
237 /// ));
238 /// // Get the possible poses for the essential matrix created from `pose`.
239 /// let rbs = EssentialMatrix::from(pose).possible_unscaled_poses(1e-6, 50).unwrap();
240 /// let one_correct = rbs.iter().any(|&upose| {
241 /// let angle_residual =
242 /// upose.0.rotation.rotation_to(&pose.0.rotation).angle();
243 /// let translation_residual =
244 /// 1.0 - upose.0.translation.vector.normalize()
245 /// .dot(&pose.0.translation.vector.normalize());
246 /// angle_residual < 1e-4 && translation_residual < 1e-4
247 /// });
248 /// assert!(one_correct);
249 /// ```
250 pub fn possible_unscaled_poses(
251 &self,
252 epsilon: f64,
253 max_iterations: usize,
254 ) -> Option<[CameraToCamera; 4]> {
255 self.possible_rotations_unscaled_translation(epsilon, max_iterations)
256 .map(|(rot_a, rot_b, t)| {
257 [
258 CameraToCamera::from_parts(t, rot_a),
259 CameraToCamera::from_parts(t, rot_b),
260 CameraToCamera::from_parts(-t, rot_a),
261 CameraToCamera::from_parts(-t, rot_b),
262 ]
263 })
264 }
265
266 /// Same as [`EssentialMatrix::possible_unscaled_poses`], but it doesn't return
267 /// 4 unscaled poses since it doesn't bother to give back the different translation
268 /// directions and instead only gives one. This is useful if your algorithm doesn't
269 /// care about the direction of translation.
270 pub fn possible_unscaled_poses_bearing(
271 &self,
272 epsilon: f64,
273 max_iterations: usize,
274 ) -> Option<[CameraToCamera; 2]> {
275 self.possible_rotations_unscaled_translation(epsilon, max_iterations)
276 .map(|(rot_a, rot_b, t)| {
277 [
278 CameraToCamera::from_parts(t, rot_a),
279 CameraToCamera::from_parts(t, rot_b),
280 ]
281 })
282 }
283
284 /// See [`PoseSolver`].
285 ///
286 /// Creates a solver that allows you to solve for the pose using correspondences.
287 /// The pose may be scaled or unscaled, and if the `alloc` feature is enabled, you
288 /// can retrieve the inliers as well.
289 pub fn pose_solver(&self) -> PoseSolver<'_> {
290 PoseSolver {
291 essential: self,
292 epsilon: 1e-9,
293 max_iterations: 100,
294 consensus_ratio: 0.5,
295 }
296 }
297}
298
299/// Generates an essential matrix corresponding to this relative camera pose.
300///
301/// If a point `a` is transformed using [`Pose::transform`] into
302/// a point `b`, then the essential matrix returned by this method will
303/// give a residual of approximately `0.0` when you call
304/// `essential.residual(&FeatureMatch(a, b))`.
305///
306/// See the documentation of [`EssentialMatrix`] for more information.
307impl From<CameraToCamera> for EssentialMatrix {
308 fn from(pose: CameraToCamera) -> Self {
309 Self(pose.0.translation.vector.cross_matrix() * *pose.0.rotation.matrix())
310 }
311}
312
313impl Model<FeatureMatch<NormalizedKeyPoint>> for EssentialMatrix {
314 fn residual(&self, data: &FeatureMatch<NormalizedKeyPoint>) -> f64 {
315 let Self(mat) = *self;
316 let FeatureMatch(a, b) = data;
317 let normalized = |p: &NormalizedKeyPoint| {
318 let p = p.bearing_unnormalized();
319 p / p.z
320 };
321
322 // The result is a 1x1 matrix which we must get element 0 from.
323 Float::abs((normalized(b).transpose() * mat * normalized(a))[0])
324 }
325}
326
327/// Allows pose solving to be parameterized if defaults are not sufficient.
328#[derive(Copy, Clone, Debug)]
329pub struct PoseSolver<'a> {
330 essential: &'a EssentialMatrix,
331 epsilon: f64,
332 max_iterations: usize,
333 consensus_ratio: f64,
334}
335
336impl<'a> PoseSolver<'a> {
337 /// Set the epsilon to be used in SVD.
338 pub fn epsilon(self, epsilon: f64) -> Self {
339 Self { epsilon, ..self }
340 }
341
342 /// Set the max number of iterations to be used in SVD.
343 pub fn max_iterations(self, max_iterations: usize) -> Self {
344 Self {
345 max_iterations,
346 ..self
347 }
348 }
349
350 /// Set the level of agreement required for the pose to be accepted.
351 pub fn consensus_ratio(self, consensus_ratio: f64) -> Self {
352 Self {
353 consensus_ratio,
354 ..self
355 }
356 }
357
358 /// Return the [`CameraToCamera`] that transforms a [`CameraPoint`](crate::CameraPoint) of image
359 /// A (source of `a`) to the corresponding [`CameraPoint`](crate::CameraPoint) of image B (source of `b`).
360 /// The function takes an iterator over [`FeatureMatch`] from A to B.
361 /// The translation scale is unknown of the returned pose.
362 ///
363 /// * `depth` - The actual depth (`z` axis, not distance) of normalized keypoint `a`
364 /// * `a` - A keypoint from image `A`
365 /// * `b` - A keypoint from image `B`
366 ///
367 /// `self` must satisfy the constraint:
368 ///
369 /// ```text
370 /// transpose(homogeneous(a)) * E * homogeneous(b) = 0
371 /// ```
372 ///
373 /// Also, `a` and `b` must be a correspondence.
374 ///
375 /// This will take the average translation over the entire iterator. This is done
376 /// to smooth out noise and outliers (if present).
377 ///
378 /// `epsilon` is a small value to which SVD must converge to before terminating.
379 ///
380 /// `max_iterations` is the maximum number of iterations that SVD will run on this
381 /// matrix. Use this to cap the execution time.
382 /// A `max_iterations` of `0` may execute indefinitely and is not recommended except
383 /// for non-production code.
384 ///
385 /// `consensus_ratio` is the ratio of points which must be in front of the camera for the model
386 /// to be accepted and return Some. Otherwise, None is returned. Set this to about
387 /// `0.45` to have approximate majority consensus.
388 ///
389 /// `bearing_scale` is a function that is passed a translation bearing vector,
390 /// an untranslated (but rotated) camera point, and a normalized key point
391 /// where the actual point exists. It must return the scalar which the
392 /// translation bearing vector must by multiplied by to get the actual translation.
393 /// It may return `None` if it fails.
394 ///
395 /// `correspondences` must provide an iterator of tuples containing the matches
396 /// of a 3d `CameraPoint` `a` from camera A and the matching `NormalizedKeyPoint`
397 /// `b` from camera B.
398 ///
399 /// This does not communicate which points were outliers to each model.
400 pub fn solve_unscaled(
401 &self,
402 correspondences: impl Iterator<Item = FeatureMatch<NormalizedKeyPoint>>,
403 ) -> Option<CameraToCamera> {
404 // Get the possible rotations and the translation
405 self.essential
406 .possible_unscaled_poses(self.epsilon, self.max_iterations)
407 .and_then(|poses| {
408 // Get the net translation scale of points that agree with a and b
409 // in addition to the number of points that agree with a and b.
410 let (ts, total) = correspondences.fold(
411 ([0usize; 4], 0usize),
412 |(mut ts, total), FeatureMatch(a, b)| {
413 let trans_and_agree = |pose: CameraToCamera| {
414 // Put the second camera position back into the first camera's frame of reference.
415 let p = -(pose.0.rotation.inverse() * pose.0.translation.vector);
416 let a = a.virtual_image_point().coords;
417 // Transform the bearing B back into camera A's space (its a vector, so only rotation is applied).
418 let b = pose.0.rotation.inverse() * b.virtual_image_point().coords;
419 let a_squared = a.norm_squared();
420 let b_squared = b.norm_squared();
421 let a_b = a.dot(&b);
422 let a_pos = a.dot(&p);
423 let b_pos = b.dot(&p);
424
425 // Check chirality constraint.
426 b_squared * a_pos - a_b * b_pos > 0.0
427 && a_b * a_pos - a_squared * b_pos > 0.0
428 };
429
430 // Do it for all poses.
431 for (tn, &pose) in ts.iter_mut().zip(&poses) {
432 if trans_and_agree(pose) {
433 *tn += 1;
434 }
435 }
436
437 (ts, total + 1)
438 },
439 );
440
441 // Ensure that there is at least one point.
442 if total == 0 {
443 return None;
444 }
445
446 // Ensure that the best one exceeds the consensus ratio.
447 let (ix, best) = ts
448 .iter()
449 .copied()
450 .enumerate()
451 .max_by_key(|&(_, t)| t)
452 .unwrap();
453 if (best as f64) < self.consensus_ratio * total as f64 && best != 0 {
454 return None;
455 }
456
457 Some(poses[ix])
458 })
459 }
460
461 /// Same as [`PoseSolver::solve_unscaled`], but also communicates the inliers.
462 ///
463 /// The `alloc` feature must be enabled to use this method.
464 #[cfg(feature = "alloc")]
465 pub fn solve_unscaled_inliers(
466 &self,
467 correspondences: impl Iterator<Item = FeatureMatch<NormalizedKeyPoint>>,
468 ) -> Option<(CameraToCamera, alloc::vec::Vec<usize>)> {
469 // Get the possible rotations and the translation
470 self.essential
471 .possible_unscaled_poses(self.epsilon, self.max_iterations)
472 .and_then(|poses| {
473 // Get the net translation scale of points that agree with a and b
474 // in addition to the number of points that agree with a and b.
475 let (mut ts, total) = correspondences.enumerate().fold(
476 (
477 [
478 (0usize, alloc::vec::Vec::new()),
479 (0usize, alloc::vec::Vec::new()),
480 (0usize, alloc::vec::Vec::new()),
481 (0usize, alloc::vec::Vec::new()),
482 ],
483 0usize,
484 ),
485 |(mut ts, total), (ix, FeatureMatch(a, b))| {
486 let trans_and_agree = |pose: CameraToCamera| {
487 // Put the second camera position back into the first camera's frame of reference.
488 let p = -(pose.0.rotation.inverse() * pose.0.translation.vector);
489 let a = a.virtual_image_point().coords;
490 // Transform the bearing B back into camera A's space (its a vector, so only rotation is applied).
491 let b = pose.0.rotation.inverse() * b.virtual_image_point().coords;
492 let a_squared = a.norm_squared();
493 let b_squared = b.norm_squared();
494 let a_b = a.dot(&b);
495 let a_pos = a.dot(&p);
496 let b_pos = b.dot(&p);
497
498 // Check chirality constraint.
499 b_squared * a_pos - a_b * b_pos > 0.0
500 && a_b * a_pos - a_squared * b_pos > 0.0
501 };
502
503 // Do it for all poses.
504 for ((tn, ti), &pose) in ts.iter_mut().zip(&poses) {
505 if trans_and_agree(pose) {
506 *tn += 1;
507 ti.push(ix);
508 }
509 }
510
511 (ts, total + 1)
512 },
513 );
514
515 // Ensure that there is at least one point.
516 if total == 0 {
517 return None;
518 }
519
520 // Ensure that the best one exceeds the consensus ratio.
521 let (ix, best) = ts
522 .iter()
523 .map(|&(tn, _)| tn)
524 .enumerate()
525 .max_by_key(|&(_, t)| t)
526 .unwrap();
527 if (best as f64) < self.consensus_ratio * total as f64 && best != 0 {
528 return None;
529 }
530
531 let inliers = core::mem::take(&mut ts[ix].1);
532
533 Some((poses[ix], inliers))
534 })
535 }
536}