sequoia_git/verify.rs
1//! Commit-tree traversal and verification.
2
3use std::{
4 borrow::Cow,
5 path::{Path, PathBuf},
6 collections::{
7 BTreeMap,
8 BTreeSet,
9 },
10};
11
12use anyhow::{anyhow, Context, Result};
13use git2::{
14 Repository,
15 Oid,
16};
17
18use sequoia_openpgp::{
19 self as openpgp,
20 Cert,
21 cert::{
22 amalgamation::ValidAmalgamation,
23 CertParser,
24 },
25 Fingerprint,
26 packet::{Signature, UserID},
27 parse::Parse,
28 policy::StandardPolicy,
29 types::{
30 HashAlgorithm,
31 RevocationStatus,
32 RevocationType,
33 },
34};
35
36use crate::{
37 Error,
38 Policy,
39 persistent_set,
40};
41
42/// Whether to trace execution by default (on stderr).
43const TRACE: bool = false;
44
45#[derive(Default, Debug)]
46pub struct VerificationResult {
47 pub signer_keys: BTreeSet<openpgp::Fingerprint>,
48 pub primary_uids: BTreeSet<UserID>,
49}
50
51pub fn verify(git: &Repository,
52 trust_root: Oid,
53 shadow_policy: Option<&[u8]>,
54 commit_range: (Oid, Oid),
55 results: &mut VerificationResult,
56 keep_going: bool,
57 mut verify_cb: impl FnMut(&Oid, Option<&Oid>, &crate::Result<Vec<crate::Result<(String, Signature, Cert, Fingerprint)>>>) -> crate::Result<()>,
58 cache: &mut VerificationCache,
59 quiet: bool, verbose: bool)
60 -> Result<()>
61{
62 tracer!(TRACE, "verify");
63 t!("verify(_, {}, {}..{})", trust_root, commit_range.0, commit_range.1);
64
65 if shadow_policy.is_some() {
66 t!("Using a shadow policy to verify commits.");
67 } else {
68 t!("Using in-band policies to verify commits.");
69 }
70
71 // XXX: These should be passed in as arguments.
72 let p: &StandardPolicy = &StandardPolicy::new();
73 let now = std::time::SystemTime::now();
74
75 // STRATEGY
76 //
77 // We want to determine if we can authenticate the target commit
78 // starting from the trust root. A simple approach is to try each
79 // possible path. Unfortunately, this is exponential in the
80 // number of merges. Consider a project where development is done
81 // on feature branches, and then merged using a merge commit. The
82 // commit graph will look like:
83 //
84 // ```
85 // o <- Merge commit
86 // | \
87 // | o <- Feature branch
88 // | /
89 // o <- Merge commit
90 // | \
91 // | o <- Feature branch
92 // | /
93 // o <- Merge commit
94 // | \
95 // ...
96 // ```
97 //
98 // After 100 such merges, there are 2**100 different paths.
99 // That's intractable.
100 //
101 // Happily, we can do better. We can determine if there is an
102 // authenticated path as a byproduct of a topological walk. This
103 // approach limits both the work we have to do, and the space we
104 // require to O(N), where N is the number of commits preceding the
105 // target commit in the commit graph. (If the trust root is a
106 // dominator, which is usually the case, then N is the number of
107 // commits that precede target, and follow the trust root, which
108 // is often very small.)
109 //
110 // Recall the following facts:
111 //
112 // - A commit is authenticated if it is the trust root, or at
113 // least one parent's policy authenticates the commit.
114 // - Unless the signer's certificate is hard revoked.
115 // - Unless there is an authenticated suffix that
116 // immediately follows the commit, and a commit on that
117 // authenticated suffix goodlists commit.
118 //
119 // In order to check the last condition, we need to know the
120 // goodlists of all the following, authenticated commits when we
121 // process the commit. When we do a topological walk, we know
122 // that when we visit a commit, we've already visited all of its
123 // ancestors. This means that we can easily collect the goodlists
124 // of all of the commits on authenticated paths from the commit to
125 // the target during the walk by propagating good lists from
126 // authenticated commits to their parents. Then, when we are
127 // examining a commit, and we discover that it has been revoked,
128 // we can immediately check if it is on a relevant goodlist.
129 //
130 // To do the topological walk, we first have to do a bit of
131 // preparation. Since we don't know a commit's children by
132 // looking at the commit (and not all children lead to the
133 // target), we have to extract that information from the graph.
134 // We do this by visiting all commits on paths from the target to
135 // the trust root, which we can do in O(N) space and time by doing
136 // a breath first search. When we visit a commit, we increment
137 // the number of children each of its parents has. We also use
138 // this opportunity to discover any certificates that have been
139 // hard revoked.
140 //
141 // Then we do the topological walk. For each commit, we consider
142 // the number of unprocessed children to be the number of recorded
143 // children. We then visit commits that have no unprocessed
144 // children.
145 //
146 // When we visit a commit, and there is an authenticated path from
147 // it to the target, we try to authenticate the commit. That is,
148 // for each parent, we check if the parent can authenticate the
149 // commit. If a parent authenticates the commit, but the signer's
150 // certificate has been revoked, we can immediately check whether
151 // the commit is on a goodlist. For each parent that
152 // authenticated the commit, we merge the commit's *aggregate*
153 // goodlist into the parent's goodlist. Finally, for each parent,
154 // we decrement the number of unprocessed children. If there are
155 // no unprocessed children left, it is added to a pending queue,
156 // so that it can be processed.
157
158 // Return the policy associated with the commit.
159 let read_policy = |commit: &Oid| -> Result<Cow<[u8]>> {
160 if let Some(p) = &shadow_policy {
161 Ok(Cow::Borrowed(p))
162 } else {
163 match Policy::read_bytes_from_commit(git, commit) {
164 Ok(policy) => Ok(Cow::Owned(policy)),
165 Err(err) => {
166 if let Error::MissingPolicy(_) = err {
167 Ok(Cow::Borrowed(b""))
168 } else {
169 Err(err.into())
170 }
171 }
172 }
173 }
174 };
175
176 // Whether we've seen the policy file before. The key is the
177 // SHA512 digest of the policy file.
178 let mut policy_files: BTreeSet<Vec<u8>> = Default::default();
179 // Any hard revoked certificates.
180 let mut hard_revoked: BTreeSet<Fingerprint> = Default::default();
181
182 // Scans the specified commit's policy for hard revocations, and
183 // adds them to hard_revoked.
184 let mut scan_policy = |commit_id| -> Result<()> {
185 let policy_bytes = read_policy(&commit_id)?;
186 let policy_hash = sha512sum(&policy_bytes)?;
187 if policy_files.contains(&policy_hash) {
188 t!("Already scanned an identical copy of {}'s policy, skipping.",
189 commit_id);
190 return Ok(());
191 }
192 t!("Scanning {}'s policy for hard revocations", commit_id);
193
194 let policy = Policy::parse_bytes(&policy_bytes)?;
195 policy_files.insert(policy_hash);
196
197 // Scan for revoked certificates.
198 for authorization in policy.authorization().values() {
199 for cert in CertParser::from_bytes(&authorization.keyring)? {
200 let cert = if let Ok(cert) = cert {
201 cert
202 } else {
203 continue;
204 };
205
206 let vc = if let Ok(vc) = cert.with_policy(p, Some(now)) {
207 vc
208 } else {
209 continue;
210 };
211
212 let is_hard_revoked = |rs| {
213 if let RevocationStatus::Revoked(revs) = rs {
214 revs.iter().any(|rev| {
215 if let Some((reason, _)) = rev.reason_for_revocation() {
216 reason.revocation_type() == RevocationType::Hard
217 } else {
218 true
219 }
220 })
221 } else {
222 false
223 }
224 };
225
226 // Check if the certificate is hard revoked.
227 if is_hard_revoked(vc.revocation_status()) {
228 t!("Certificate {} is hard revoked, bad listing",
229 cert.fingerprint());
230 hard_revoked.insert(vc.fingerprint());
231 for k in vc.keys().subkeys().for_signing() {
232 hard_revoked.insert(k.key().fingerprint());
233 t!(" Badlisting signing key {}",
234 k.key().fingerprint());
235 }
236
237 continue;
238 }
239
240 // Check if any of the signing keys are hard revoked.
241 for k in vc.keys().subkeys().for_signing() {
242 if is_hard_revoked(k.revocation_status()) {
243 hard_revoked.insert(k.key().fingerprint());
244 t!(" Signing key {} hard revoked, bad listing",
245 k.key().fingerprint());
246 }
247 }
248 }
249 }
250
251 Ok(())
252 };
253
254 let middle = if trust_root == commit_range.0 {
255 None
256 } else {
257 Some(commit_range.0)
258 };
259
260 struct Commit {
261 // Number of unprocessed children (commits following this
262 // one).
263 unprocessed_children: usize,
264
265 // Whether there is an authenticated path from this node to
266 // the target.
267 authenticated_suffix: bool,
268
269 // Whether we still need to go via MIDDLE.
270 traversed_middle: bool,
271 }
272
273 impl Default for Commit {
274 fn default() -> Self {
275 Commit {
276 unprocessed_children: 0,
277 authenticated_suffix: false,
278 traversed_middle: false,
279 }
280 }
281 }
282
283 let mut commits: BTreeMap<Oid, Commit> = Default::default();
284 if trust_root != commit_range.1 {
285 commits.insert(
286 commit_range.1.clone(),
287 Commit {
288 unprocessed_children: 0,
289 authenticated_suffix: true,
290 traversed_middle: middle.is_none(),
291 });
292 }
293
294 // We walk the tree from the target to the trust root (using a
295 // breath first search, but it doesn't matter), and fill in
296 // COMMITS.
297 {
298 // Commits that we haven't processed yet.
299 let mut pending: BTreeSet<Oid> = Default::default();
300 pending.insert(commit_range.1.clone());
301
302 // Commits that we've processed.
303 let mut processed: BTreeSet<Oid> = Default::default();
304
305 while let Some(commit_id) = pending.pop_first() {
306 processed.insert(commit_id);
307
308 let commit = git.find_commit(commit_id)?;
309
310 // Don't fail if we can't parse the policy file.
311 let _ = scan_policy(commit_id);
312
313 if commit_id == trust_root {
314 // This is the trust root. There is no need to go
315 // further.
316 continue;
317 }
318
319 for parent in commit.parents() {
320 let parent_id = parent.id();
321
322 // Add an entry for the parent commit (if we haven't
323 // yet done so).
324 let info = commits.entry(parent_id).or_default();
325 info.unprocessed_children += 1;
326
327 if ! processed.contains(&parent_id)
328 && ! pending.contains(&parent_id)
329 {
330 pending.insert(parent_id);
331 }
332 }
333 }
334 }
335
336 // The union of the commit goodlists of commits on an
337 // authenticated suffix (not including this commit). We build
338 // this up as we authenticate commits. Since we do a topological
339 // walk, this will be complete for a given commit when we process
340 // that commit.
341 let mut descendant_goodlist: BTreeMap<Oid, BTreeSet<String>>
342 = Default::default();
343
344 let mut errors = Vec::new();
345 let mut unauthenticated_commits: BTreeSet<Oid> = Default::default();
346 let mut authenticated_commits: BTreeSet<Oid> = Default::default();
347
348 // Authenticate the commit using the specified parent.
349 //
350 // NOTE: This must only be called on a commit, if the commit is on
351 // an authenticated suffix!!!
352 let mut authenticate_commit = |commit_id, parent_id| -> Result<bool> {
353 let parent_policy = read_policy(&parent_id)?;
354 let parent_id = if commit_id == parent_id {
355 // This is only the case when verifying the trust root
356 // using the shadow policy.
357 assert_eq!(commit_id, trust_root);
358 assert!(shadow_policy.is_some());
359 None
360 } else {
361 Some(&parent_id)
362 };
363
364 // The current commit's good list.
365 let mut commit_goodlist = BTreeSet::new();
366
367 // XXX: If we have some certificates that are hard revoked,
368 // then we can't use the cache. This is because the cache
369 // doesn't tell us what certificate was used to sign the
370 // commit, which means we can't figure out if the signer's
371 // certificate was revoked when the result is cached.
372 let (vresult, cache_hit) = if hard_revoked.is_empty()
373 && cache.contains(&parent_policy, commit_id)?
374 {
375 (Ok(vec![]), true)
376 } else {
377 let parent_policy = Policy::parse_bytes(&parent_policy)?;
378 let commit_policy = Policy::parse_bytes(read_policy(&commit_id)?)?;
379
380 commit_goodlist = commit_policy.commit_goodlist().clone();
381
382 (parent_policy.verify(git, &commit_id, &commit_policy,
383 &mut results.signer_keys,
384 &mut results.primary_uids),
385 false)
386 };
387
388 if let Err(err) = verify_cb(&commit_id, parent_id, &vresult) {
389 t!("verify_cb -> {}", err);
390 return Err(err.into());
391 }
392
393 if cache_hit {
394 // XXX: communicate this to the caller
395 // instead of println.
396 let id = if let Some(parent_id) = parent_id {
397 format!("{}..{}", parent_id, commit_id)
398 } else {
399 commit_id.to_string()
400 };
401 if verbose {
402 println!("{}:\n Cached positive verification", id);
403 }
404 }
405
406 match vresult {
407 Ok(results) => {
408 // Whether the parent authenticated the commit.
409 let mut good = false;
410 // Whether the commit was goodlisted by a later
411 // commit.
412 let mut goodlisted = false;
413
414 // Whether the commit was goodlisted by the parent's
415 // policy. (Because commits form a Merkle tree, this
416 // is only possible when we are using a shadow
417 // policy.)
418 if ! cache_hit && results.is_empty() {
419 // XXX: communicate this to the caller
420 // instead of eprintln.
421 if verbose {
422 println!("{}: Explicitly goodlisted", commit_id);
423 }
424 good = true;
425 }
426
427 for r in results {
428 match r {
429 Ok((_, _sig, cert, signer_fpr)) => {
430 // It looks good, but make sure the
431 // certificate was not revoked.
432 if hard_revoked.contains(&signer_fpr) {
433 t!("Cert {}{} used to sign {} is revoked.",
434 cert.fingerprint(),
435 if cert.fingerprint() != signer_fpr {
436 format!(", key {}", signer_fpr)
437 } else {
438 "".to_string()
439 },
440 commit_id);
441
442 // It was revoked, but perhaps the
443 // commit was goodlisted.
444 if descendant_goodlist.get(&commit_id)
445 .map(|goodlist| {
446 t!(" Goodlist contains: {}",
447 goodlist
448 .iter().cloned()
449 .collect::<Vec<String>>()
450 .join(", "));
451 goodlist.contains(&commit_id.to_string())
452 })
453 .unwrap_or(false)
454 {
455 t!("But the commit was goodlisted, \
456 so all is good.");
457 goodlisted = true;
458 }
459 } else {
460 t!("{} has a good signature from {}",
461 commit_id, cert.fingerprint());
462 good = true;
463 }
464 }
465 Err(e) => errors.push(
466 anyhow::Error::from(e).context(
467 format!("While verifying commit {}",
468 commit_id))),
469 }
470 }
471
472 // We do NOT insert into the cache if the commit was
473 // goodlisted. The cache is a function of the parent
474 // policy and the children policy; goodlisting is a
475 // function of commit range.
476 if ! cache_hit && good && ! goodlisted {
477 cache.insert(&parent_policy, commit_id)?;
478 }
479
480 if cache_hit || good || goodlisted {
481 // Merge the commit's goodlist into the parent's
482 // goodlist.
483 if let Some(descendant_goodlist)
484 = descendant_goodlist.get(&commit_id)
485 {
486 commit_goodlist.extend(descendant_goodlist.iter().cloned());
487 };
488
489 if let Some(parent_id) = parent_id {
490 if let Some(p_goodlist)
491 = descendant_goodlist.get_mut(&parent_id)
492 {
493 p_goodlist.extend(commit_goodlist.into_iter());
494 } else if ! commit_goodlist.is_empty() {
495 descendant_goodlist.insert(
496 parent_id.clone(), commit_goodlist);
497 }
498 }
499 }
500
501 let authenticated = cache_hit || good || goodlisted;
502 if authenticated {
503 authenticated_commits.insert(commit_id);
504 } else {
505 unauthenticated_commits.insert(commit_id);
506 }
507 Ok(authenticated)
508 },
509 Err(e) => {
510 unauthenticated_commits.insert(commit_id);
511 errors.push(anyhow::Error::from(e).context(
512 format!("While verifying commit {}", commit_id)));
513 Ok(false)
514 },
515 }
516 };
517
518 // We now do a topological walk from the target to the trust root.
519
520 // Assume there is no path until we prove otherwise. (A
521 // zero-length path is already valid.)
522 let mut valid_path = trust_root == commit_range.0
523 && commit_range.0 == commit_range.1;
524
525 // Commits that we haven't processed yet and have no unprocessed
526 // children.
527 let mut pending: BTreeSet<Oid> = Default::default();
528 if trust_root != commit_range.1 {
529 pending.insert(commit_range.1.clone());
530 }
531
532 'authentication: while let Some(commit_id) = pending.pop_first() {
533 let commit = git.find_commit(commit_id)?;
534
535 t!("Processing {}: {}", commit_id, commit.summary().unwrap_or(""));
536
537 let commit_info = commits.get(&commit_id).expect("added");
538 assert_eq!(commit_info.unprocessed_children, 0);
539 let authenticated_suffix = commit_info.authenticated_suffix;
540 let traversed_middle = commit_info.traversed_middle;
541
542 for (parent_i, parent) in commit.parents().enumerate() {
543 let parent_id = parent.id();
544 t!("Considering {} -> {} (parent #{} of {})",
545 commit_id, parent_id, parent_i + 1, commit.parents().len());
546
547 let parent_is_trust_root = parent_id == trust_root;
548
549 let parent_info = commits.get_mut(&parent_id)
550 .with_context(|| format!("Looking up {}", parent_id))
551 .expect("added");
552 t!(" Parent has {} unprocessed children",
553 parent_info.unprocessed_children);
554 assert!(parent_info.unprocessed_children > 0);
555 parent_info.unprocessed_children -= 1;
556 if parent_info.unprocessed_children == 0 && ! parent_is_trust_root {
557 t!(" Adding parent to pending queue for processing");
558 pending.insert(parent_id);
559 }
560
561 if authenticated_suffix {
562 t!(" Child IS on an authenticated suffix");
563 } else {
564 t!(" Child IS NOT on an authenticated suffix.");
565 }
566
567 let authenticated = if keep_going || authenticated_suffix {
568 authenticate_commit(commit_id, parent_id)
569 .with_context(|| {
570 format!("Authenticating {} with {}",
571 commit_id, parent_id)
572 })?
573 } else {
574 false
575 };
576 t!(" Could {}authenticate commit.",
577 if authenticated { "" } else { "not " });
578
579 if authenticated_suffix && authenticated {
580 t!(" Parent authenticates child");
581 if ! parent_info.authenticated_suffix {
582 t!(" Parent is now part of an authenticated suffix.");
583 }
584 parent_info.authenticated_suffix = true;
585
586 if traversed_middle {
587 parent_info.traversed_middle = true;
588 } else if middle == Some(commit_id) {
589 t!(" Traversed {} on way to trust root.", commit_id);
590 parent_info.traversed_middle = true;
591 }
592
593 if parent_is_trust_root {
594 t!(" Parent is the trust root.");
595
596 if ! parent_info.traversed_middle {
597 t!(" but path was not via {}", middle.unwrap());
598 } else {
599 // This is the trust root and we traversed the
600 // middle commit. There is no need to go
601 // further.
602 valid_path = true;
603 if ! keep_going {
604 break 'authentication;
605 }
606 }
607 }
608 }
609 }
610 }
611
612 t!("No commits pending.");
613
614 // When using a shadow policy, we also authenticate the trust
615 // root with it.
616 if (keep_going || valid_path) && shadow_policy.is_some() {
617 t!("Verifying trust root ({}) using the shadow policy",
618 trust_root);
619 // We verify the trust root using itself? Not quite. We know
620 // that authenticate_commit prefers the shadow policy, and we
621 // know that a shadow policy is set. So this will actually
622 // check that the shadow policy verifies the trust root.
623 if ! authenticate_commit(trust_root, trust_root)? {
624 valid_path = false;
625
626 if let Some(e) = errors.pop() {
627 errors.push(
628 e.context(format!("Could not verify trust root {} \
629 using the specified policy",
630 trust_root)));
631 }
632 }
633 }
634
635 if valid_path {
636 if trust_root == commit_range.0 {
637 if ! quiet {
638 println!(
639 "Verified that there is an authenticated path from the trust root\n\
640 {} to {}.",
641 trust_root, commit_range.1);
642 }
643 } else {
644 if ! quiet {
645 println!(
646 "Verified that there is an authenticated path from the trust root\n\
647 {} via {}\n\
648 to {}.",
649 trust_root, commit_range.0, commit_range.1);
650 }
651 }
652 Ok(())
653 } else {
654 if errors.is_empty() {
655 Err(anyhow!("Could not verify commits {}..{}{}",
656 trust_root,
657 if let Some(middle) = middle {
658 format!("{}..", middle)
659 } else {
660 "".to_string()
661 },
662 commit_range.1))
663 } else {
664 let mut e = errors.swap_remove(0)
665 .context(format!("Could not verify commits {}..{}{}",
666 commit_range.0,
667 if let Some(middle) = middle {
668 format!("{}..", middle)
669 } else {
670 "".to_string()
671 },
672 commit_range.1));
673 if ! errors.is_empty() {
674 e = e.context(
675 format!("{} errors occurred while verifying the commits. \
676 {} commits couldn't be authenticated. \
677 Note: not all errors are fatal. \
678 The first error is shown:",
679 errors.len() + 1,
680 unauthenticated_commits.difference(
681 &authenticated_commits).count()));
682 }
683 Err(e)
684 }
685 }
686}
687
688// Returns the SHA512 digest of the provided bytes.
689fn sha512sum(bytes: &[u8]) -> Result<Vec<u8>> {
690 let mut digest = HashAlgorithm::SHA512.context()?.for_digest();
691 digest.update(bytes);
692
693 let mut key = Vec::with_capacity(32);
694 digest.digest(&mut key)?;
695 Ok(key)
696}
697
698pub struct VerificationCache {
699 path: PathBuf,
700 set: persistent_set::Set,
701}
702
703impl VerificationCache {
704 const CONTEXT: &'static str = "SqGitVerify0";
705
706 pub fn new() -> Result<Self> {
707 let p = dirs::cache_dir().ok_or(anyhow::anyhow!("No cache dir"))?
708 .join("sq-git.verification.cache");
709 Self::open(p)
710 }
711
712 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
713 let path = path.as_ref();
714
715 Ok(VerificationCache {
716 path: path.into(),
717 set: persistent_set::Set::read(&path, Self::CONTEXT)?,
718 })
719 }
720
721 fn key(&self, policy: &[u8], commit: Oid) -> Result<persistent_set::Value> {
722 let mut digest = HashAlgorithm::SHA512.context()?.for_digest();
723 digest.update(policy);
724 digest.update(commit.as_bytes());
725
726 let mut key = [0; 32];
727 digest.digest(&mut key)?;
728
729 Ok(key)
730 }
731
732 /// Returns whether (policy, commit id) is in the cache.
733 ///
734 /// If (policy, commit id) is in the cache, then it was previously
735 /// determined that the policy authenticated the commit.
736 pub fn contains(&mut self, policy: &[u8], commit: Oid) -> Result<bool> {
737 Ok(self.set.contains(&self.key(policy, commit)?)?)
738 }
739
740 /// Add (policy, commit id) to the cache.
741 ///
742 /// If (policy, commit id) is in the cache, this means that the
743 /// policy considers the commit to be authenticated. Normally,
744 /// the policy comes from the parent commit, but it may be a
745 /// shadow policy.
746 pub fn insert(&mut self, policy: &[u8], commit: Oid) -> Result<()> {
747 self.set.insert(self.key(policy, commit)?);
748 Ok(())
749 }
750
751 pub fn persist(&mut self) -> Result<()> {
752 self.set.write(&self.path)?;
753 Ok(())
754 }
755}