sequoia_git/verify.rs
1//! Commit-tree traversal and verification.
2
3use std::{
4 path::{Path, PathBuf},
5 collections::{
6 BTreeMap,
7 BTreeSet,
8 },
9};
10
11use anyhow::{anyhow, Context, Result};
12use git2::{
13 Repository,
14 Oid,
15};
16
17use sequoia_openpgp::{
18 self as openpgp,
19 Cert,
20 cert::{
21 amalgamation::ValidAmalgamation,
22 CertParser,
23 },
24 crypto::hash::Digest,
25 Fingerprint,
26 packet::{Signature, UserID},
27 parse::Parse,
28 policy::StandardPolicy,
29 types::{
30 HashAlgorithm,
31 RevocationStatus,
32 RevocationType,
33 },
34};
35
36use crate::{
37 Policy,
38 persistent_set,
39};
40
41/// Whether to trace execution by default (on stderr).
42const TRACE: bool = false;
43
44#[derive(Default, Debug)]
45pub struct VerificationResult {
46 pub signer_keys: BTreeSet<openpgp::Fingerprint>,
47 pub primary_uids: BTreeSet<UserID>,
48}
49
50pub fn verify(git: &Repository,
51 trust_root: Oid,
52 shadow_policy: Option<&[u8]>,
53 commit_range: (Oid, Oid),
54 results: &mut VerificationResult,
55 keep_going: bool,
56 mut verify_cb: impl FnMut(&Oid, Option<&Oid>, &crate::Result<Vec<crate::Result<(String, Signature, Cert, Fingerprint)>>>) -> crate::Result<()>,
57 cache: &mut VerificationCache)
58 -> Result<()> {
59 tracer!(TRACE, "verify");
60 t!("verify(_, {}, {}..{})", trust_root, commit_range.0, commit_range.1);
61
62 if shadow_policy.is_some() {
63 t!("Using a shadow policy to verify commits.");
64 } else {
65 t!("Using in-band policies to verify commits.");
66 }
67
68 // XXX: These should be passed in as arguments.
69 let p: &StandardPolicy = &StandardPolicy::new();
70 let now = std::time::SystemTime::now();
71
72 // STRATEGY
73 //
74 // We want to determine if we can authenticate the target commit
75 // starting from the trust root. A simple approach is to try each
76 // possible path. Unfortunately, this is exponential in the
77 // number of merges. Consider a project where development is done
78 // on feature branches, and then merged using a merge commit. The
79 // commit graph will look like:
80 //
81 // ```
82 // o <- Merge commit
83 // | \
84 // | o <- Feature branch
85 // | /
86 // o <- Merge commit
87 // | \
88 // | o <- Feature branch
89 // | /
90 // o <- Merge commit
91 // | \
92 // ...
93 // ```
94 //
95 // After 100 such merges, there are 2**100 different paths.
96 // That's intractable.
97 //
98 // Happily, we can do better. We can determine if there is an
99 // authenticated path as a byproduct of a topological walk. This
100 // approach limits both the work we have to do, and the space we
101 // require to O(N), where N is the number of commits preceding the
102 // target commit in the commit graph. (If the trust root is a
103 // dominator, which is usually the case, then N is the number of
104 // commits that precede target, and follow the trust root, which
105 // is often very small.)
106 //
107 // Recall the following facts:
108 //
109 // - A commit is authenticated if it is the trust root, or at
110 // least one parent's policy authenticates the commit.
111 // - Unless the signer's certificate is hard revoked.
112 // - Unless there is an authenticated suffix that
113 // immediately follows the commit, and a commit on that
114 // authenticated suffix goodlists commit.
115 //
116 // In order to check the last condition, we need to know the
117 // goodlists of all the following, authenticated commits when we
118 // process the commit. When we do a topological walk, we know
119 // that when we visit a commit, we've already visited all of its
120 // ancestors. This means that we can easily collect the goodlists
121 // of all of the commits on authenticated paths from the commit to
122 // the target during the walk by propagating good lists from
123 // authenticated commits to their parents. Then, when we are
124 // examining a commit, and we discover that it has been revoked,
125 // we can immediately check if it is on a relevant goodlist.
126 //
127 // To do the topological walk, we first have to do a bit of
128 // preparation. Since we don't know a commit's children by
129 // looking at the commit (and not all children lead to the
130 // target), we have to extract that information from the graph.
131 // We do this by visiting all commits on paths from the target to
132 // the trust root, which we can do in O(N) space and time by doing
133 // a breath first search. When we visit a commit, we increment
134 // the number of children each of its parents has. We also use
135 // this opportunity to discover any certificates that have been
136 // hard revoked.
137 //
138 // Then we do the topological walk. For each commit, we consider
139 // the number of unprocessed children to be the number of recorded
140 // children. We then visit commits that have no unprocessed
141 // children.
142 //
143 // When we visit a commit, and there is an authenticated path from
144 // it to the target, we try to authenticate the commit. That is,
145 // for each parent, we check if the parent can authenticate the
146 // commit. If a parent authenticates the commit, but the signer's
147 // certificate has been revoked, we can immediately check whether
148 // the commit is on a goodlist. For each parent that
149 // authenticated the commit, we merge the commit's *aggregate*
150 // goodlist into the parent's goodlist. Finally, for each parent,
151 // we decrement the number of unprocessed children. If there are
152 // no unprocessed children left, it is added to a pending queue,
153 // so that it can be processed.
154
155 // Return the policy associated with the commit.
156 let read_policy = |commit: &Oid| -> Result<Vec<u8>> {
157 if let Some(p) = &shadow_policy {
158 Ok(p.to_vec())
159 } else {
160 Ok(Policy::read_bytes_from_commit(git, commit)?)
161 }
162 };
163
164 // Whether we've seen the policy file before. The key is the
165 // SHA512 digest of the policy file.
166 let mut policy_files: BTreeSet<Vec<u8>> = Default::default();
167 // Any hard revoked certificates.
168 let mut hard_revoked: BTreeSet<Fingerprint> = Default::default();
169
170 // Scans the specified commit's policy for hard revocations, and
171 // adds them to hard_revoked.
172 let mut scan_policy = |commit_id| -> Result<()> {
173 let policy_bytes = read_policy(&commit_id)?;
174 let policy_hash = sha512sum(&policy_bytes)?;
175 if policy_files.contains(&policy_hash) {
176 t!("Already scanned an identical copy of {}'s policy, skipping.",
177 commit_id);
178 return Ok(());
179 }
180 t!("Scanning {}'s policy for hard revocations", commit_id);
181
182 let policy = Policy::parse_bytes(&policy_bytes)?;
183 policy_files.insert(policy_hash);
184
185 // Scan for revoked certificates.
186 for authorization in policy.authorization().values() {
187 for cert in CertParser::from_bytes(&authorization.keyring)? {
188 let cert = if let Ok(cert) = cert {
189 cert
190 } else {
191 continue;
192 };
193
194 let vc = if let Ok(vc) = cert.with_policy(p, Some(now)) {
195 vc
196 } else {
197 continue;
198 };
199
200 let is_hard_revoked = |rs| {
201 if let RevocationStatus::Revoked(revs) = rs {
202 revs.iter().any(|rev| {
203 if let Some((reason, _)) = rev.reason_for_revocation() {
204 reason.revocation_type() == RevocationType::Hard
205 } else {
206 true
207 }
208 })
209 } else {
210 false
211 }
212 };
213
214 // Check if the certificate is hard revoked.
215 if is_hard_revoked(vc.revocation_status()) {
216 t!("Certificate {} is hard revoked, bad listing",
217 cert.fingerprint());
218 hard_revoked.insert(vc.fingerprint());
219 for k in vc.keys().subkeys().for_signing() {
220 hard_revoked.insert(k.fingerprint());
221 t!(" Badlisting signing key {}",
222 k.fingerprint());
223 }
224
225 continue;
226 }
227
228 // Check if any of the signing keys are hard revoked.
229 for k in vc.keys().subkeys().for_signing() {
230 if is_hard_revoked(k.revocation_status()) {
231 hard_revoked.insert(k.fingerprint());
232 t!(" Signing key {} hard revoked, bad listing",
233 k.fingerprint());
234 }
235 }
236 }
237 }
238
239 Ok(())
240 };
241
242 let middle = if trust_root == commit_range.0 {
243 None
244 } else {
245 Some(commit_range.0)
246 };
247
248 struct Commit {
249 // Number of children (commits derived from this one).
250 children: usize,
251
252 // Whether there is an authenticated path from this node to
253 // the target.
254 authenticated_suffix: bool,
255
256 // Whether we still need to go via MIDDLE.
257 traversed_middle: bool,
258 }
259
260 impl Default for Commit {
261 fn default() -> Self {
262 Commit {
263 children: 0,
264 authenticated_suffix: false,
265 traversed_middle: false,
266 }
267 }
268 }
269
270 let mut commits: BTreeMap<Oid, Commit> = Default::default();
271 if trust_root != commit_range.1 {
272 commits.insert(
273 commit_range.1.clone(),
274 Commit {
275 children: 0,
276 authenticated_suffix: true,
277 traversed_middle: middle.is_none(),
278 });
279 }
280
281 // We walk the tree from the target to the trust root (using a
282 // breath first search, but it doesn't matter), and fill in
283 // COMMITS.CHILDREN.
284 {
285 // Commits that we haven't processed yet.
286 let mut pending: BTreeSet<Oid> = Default::default();
287 pending.insert(commit_range.1.clone());
288
289 // Commits that we've processed.
290 let mut processed: BTreeSet<Oid> = Default::default();
291
292 while let Some(commit_id) = pending.pop_first() {
293 processed.insert(commit_id);
294
295 let commit = git.find_commit(commit_id)?;
296
297 // Don't fail if we can't parse the policy file.
298 let _ = scan_policy(commit_id);
299
300 if commit_id == trust_root {
301 // This is the trust root. There is no need to go
302 // further.
303 continue;
304 }
305
306 for parent in commit.parents() {
307 let parent_id = parent.id();
308
309 let info = commits.entry(parent_id).or_default();
310 info.children += 1;
311
312 if ! processed.contains(&parent_id)
313 && ! pending.contains(&parent_id)
314 {
315 pending.insert(parent_id);
316 }
317 }
318 }
319 }
320
321 // The union of the commit goodlists of commits on an
322 // authenticated suffix (not including this commit). We build
323 // this up as we authenticate commits. Since we do a topological
324 // walk, this will be complete for a given commit when we process
325 // that commit.
326 let mut descendant_goodlist: BTreeMap<Oid, BTreeSet<String>>
327 = Default::default();
328
329 let mut errors = Vec::new();
330 let mut unauthenticated_commits: BTreeSet<Oid> = Default::default();
331 let mut authenticated_commits: BTreeSet<Oid> = Default::default();
332
333 // Authenticate the commit using the specified parent.
334 //
335 // NOTE: This must only be called on a commit, if the commit is on
336 // an authenticated suffix!!!
337 let mut authenticate_commit = |commit_id, parent_id| -> Result<bool> {
338 let parent_policy = read_policy(&parent_id)?;
339 let parent_id = if commit_id == parent_id {
340 // This is only the case when verifying the trust root
341 // using the shadow policy.
342 assert_eq!(commit_id, trust_root);
343 assert!(shadow_policy.is_some());
344 None
345 } else {
346 Some(&parent_id)
347 };
348
349 // The current commit's good list.
350 let mut commit_goodlist = BTreeSet::new();
351
352 // XXX: If we have some certificates that are hard revoked,
353 // then we can't use the cache. This is because the cache
354 // doesn't tell us what certificate was used to sign the
355 // commit, which means we can't figure out if the signer's
356 // certificate was revoked when the result is cached.
357 let (vresult, cache_hit) = if hard_revoked.is_empty()
358 && cache.contains(&parent_policy, commit_id)?
359 {
360 (Ok(vec![]), true)
361 } else {
362 let parent_policy = Policy::parse_bytes(&parent_policy)?;
363 let commit_policy = Policy::parse_bytes(read_policy(&commit_id)?)?;
364
365 commit_goodlist = commit_policy.commit_goodlist().clone();
366
367 (parent_policy.verify(git, &commit_id, &commit_policy,
368 &mut results.signer_keys,
369 &mut results.primary_uids),
370 false)
371 };
372
373 if let Err(err) = verify_cb(&commit_id, parent_id, &vresult) {
374 t!("verify_cb -> {}", err);
375 return Err(err.into());
376 }
377
378 if cache_hit {
379 // XXX: communicate this to the caller
380 // instead of eprintln.
381 let id = if let Some(parent_id) = parent_id {
382 format!("{}..{}", parent_id, commit_id)
383 } else {
384 commit_id.to_string()
385 };
386 eprintln!("{}:\n Cached positive verification", id);
387 }
388
389 match vresult {
390 Ok(results) => {
391 // Whether the parent authenticated the commit.
392 let mut good = false;
393 // Whether the commit was goodlisted by a later
394 // commit.
395 let mut goodlisted = false;
396
397 // Whether the commit was goodlisted by the parent's
398 // policy. (Because commits form a Merkle tree, this
399 // is only possible when we are using a shadow
400 // policy.)
401 if ! cache_hit && results.is_empty() {
402 // XXX: communicate this to the caller
403 // instead of eprintln.
404 eprintln!("{}: Explicitly goodlisted", commit_id);
405 good = true;
406 }
407
408 for r in results {
409 match r {
410 Ok((_, _sig, cert, signer_fpr)) => {
411 // It looks good, but make sure the
412 // certificate was not revoked.
413 if hard_revoked.contains(&signer_fpr) {
414 t!("Cert {}{} used to sign {} is revoked.",
415 cert.fingerprint(),
416 if cert.fingerprint() != signer_fpr {
417 format!(", key {}", signer_fpr)
418 } else {
419 "".to_string()
420 },
421 commit_id);
422
423 // It was revoked, but perhaps the
424 // commit was goodlisted.
425 if descendant_goodlist.get(&commit_id)
426 .map(|goodlist| {
427 t!(" Goodlist contains: {}",
428 goodlist
429 .iter().cloned()
430 .collect::<Vec<String>>()
431 .join(", "));
432 goodlist.contains(&commit_id.to_string())
433 })
434 .unwrap_or(false)
435 {
436 t!("But the commit was goodlisted, \
437 so all is good.");
438 goodlisted = true;
439 }
440 } else {
441 t!("{} has a good signature from {}",
442 commit_id, cert.fingerprint());
443 good = true;
444 }
445 }
446 Err(e) => errors.push(
447 anyhow::Error::from(e).context(
448 format!("While verifying commit {}",
449 commit_id))),
450 }
451 }
452
453 // We do NOT insert into the cache if the commit was
454 // goodlisted. The cache is a function of the parent
455 // policy and the children policy; goodlisting is a
456 // function of commit range.
457 if ! cache_hit && good && ! goodlisted {
458 cache.insert(&parent_policy, commit_id)?;
459 }
460
461 if cache_hit || good || goodlisted {
462 // Merge the commit's goodlist into the parent's
463 // goodlist.
464 if let Some(descendant_goodlist)
465 = descendant_goodlist.get(&commit_id)
466 {
467 commit_goodlist.extend(descendant_goodlist.iter().cloned());
468 };
469
470 if let Some(parent_id) = parent_id {
471 if let Some(p_goodlist)
472 = descendant_goodlist.get_mut(&parent_id)
473 {
474 p_goodlist.extend(commit_goodlist.into_iter());
475 } else if ! commit_goodlist.is_empty() {
476 descendant_goodlist.insert(
477 parent_id.clone(), commit_goodlist);
478 }
479 }
480 }
481
482 let authenticated = cache_hit || good || goodlisted;
483 if authenticated {
484 authenticated_commits.insert(commit_id);
485 } else {
486 unauthenticated_commits.insert(commit_id);
487 }
488 Ok(authenticated)
489 },
490 Err(e) => {
491 unauthenticated_commits.insert(commit_id);
492 errors.push(anyhow::Error::from(e).context(
493 format!("While verifying commit {}", commit_id)));
494 Ok(false)
495 },
496 }
497 };
498
499 // We now do a topological walk from the target to the trust root.
500
501 // Assume there is no path until we prove otherwise. (A
502 // zero-length path is already valid.)
503 let mut valid_path = trust_root == commit_range.0
504 && commit_range.0 == commit_range.1;
505
506 // Commits that we haven't processed yet.
507 let mut pending: BTreeSet<Oid> = Default::default();
508 if trust_root != commit_range.1 {
509 pending.insert(commit_range.1.clone());
510 }
511
512 'authentication: while let Some(commit_id) = pending.pop_first() {
513 let commit = git.find_commit(commit_id)?;
514
515 t!("Processing {}: {}", commit_id, commit.summary().unwrap_or(""));
516
517 let commit_info = commits.get(&commit_id).expect("added");
518 assert_eq!(commit_info.children, 0);
519 let authenticated_suffix = commit_info.authenticated_suffix;
520 let traversed_middle = commit_info.traversed_middle;
521
522 for (parent_i, parent) in commit.parents().enumerate() {
523 let parent_id = parent.id();
524 t!("Considering {} -> {} (parent #{})",
525 commit_id, parent_id, parent_i);
526
527 let parent_info = commits.get_mut(&parent_id).expect("added");
528 t!(" Parent has {} unprocessed children", parent_info.children);
529 assert!(parent_info.children > 0);
530 parent_info.children -= 1;
531
532 if authenticated_suffix {
533 t!(" Child IS on an authenticated suffix");
534 } else {
535 t!(" Child IS NOT on an authenticated suffix.");
536 }
537
538 let authenticated = if keep_going || authenticated_suffix {
539 authenticate_commit(commit_id, parent_id)
540 .with_context(|| {
541 format!("Authenticating {} with {}",
542 commit_id, parent_id)
543 })?
544 } else {
545 false
546 };
547
548 if authenticated_suffix && authenticated {
549 t!(" Parent authenticates child");
550 parent_info.authenticated_suffix = true;
551
552 if traversed_middle {
553 parent_info.traversed_middle = true;
554 } else if middle == Some(commit_id) {
555 t!(" Traversed {} on way to trust root.", commit_id);
556 parent_info.traversed_middle = true;
557 }
558
559 if parent_id == trust_root {
560 t!(" Parent is the trust root.");
561
562 // This is the trust root. There is no need to go
563 // further.
564 if ! parent_info.traversed_middle {
565 t!(" but path was not via {}", middle.unwrap());
566 } else {
567 valid_path = true;
568 }
569 if ! keep_going {
570 break 'authentication;
571 }
572 }
573 }
574
575 if parent_info.children == 0 {
576 t!(" No other unprocessed children.");
577 if parent_id == trust_root {
578 t!(" Parent is the start of the commit range, \
579 so it doesn't need to be processed.");
580 } else if ! keep_going && ! parent_info.authenticated_suffix {
581 t!(" Parent does not authenticate any child, \
582 so it doesn't need to be processed.");
583 } else {
584 t!(" Adding parent to pending.");
585 pending.insert(parent_id);
586 }
587 }
588 }
589 }
590
591 // When using a shadow policy, we also authenticate the trust
592 // root with it.
593 if (keep_going || valid_path) && shadow_policy.is_some() {
594 t!("Verifying trust root ({}) using the shadow policy",
595 trust_root);
596 // We verify the trust root using itself? Not quite. We know
597 // that authenticate_commit prefers the shadow policy, and we
598 // know that a shadow policy is set. So this will actually
599 // check that the shadow policy verifies the trust root.
600 if ! authenticate_commit(trust_root, trust_root)? {
601 valid_path = false;
602
603 if let Some(e) = errors.pop() {
604 errors.push(
605 e.context(format!("Could not verify trust root {} \
606 using the specified policy",
607 trust_root)));
608 }
609 }
610 }
611
612 if valid_path {
613 if trust_root == commit_range.0 {
614 eprintln!(
615 "Verified that there is an authenticated path from the trust root\n\
616 {} to {}.",
617 trust_root, commit_range.1);
618 } else {
619 eprintln!(
620 "Verified that there is an authenticated path from the trust root\n\
621 {} via {}\n\
622 to {}.",
623 trust_root, commit_range.0, commit_range.1);
624 }
625 Ok(())
626 } else {
627 if errors.is_empty() {
628 Err(anyhow!("Could not verify commits {}..{}{}",
629 trust_root,
630 if let Some(middle) = middle {
631 format!("{}..", middle)
632 } else {
633 "".to_string()
634 },
635 commit_range.1))
636 } else {
637 let mut e = errors.swap_remove(0)
638 .context(format!("Could not verify commits {}..{}{}",
639 commit_range.0,
640 if let Some(middle) = middle {
641 format!("{}..", middle)
642 } else {
643 "".to_string()
644 },
645 commit_range.1));
646 if ! errors.is_empty() {
647 e = e.context(
648 format!("{} errors occurred while verifying the commits. \
649 {} commits couldn't be authenticated. \
650 Note: not all errors are fatal. \
651 The first error is shown:",
652 errors.len() + 1,
653 unauthenticated_commits.difference(
654 &authenticated_commits).count()));
655 }
656 Err(e)
657 }
658 }
659}
660
661// Returns the SHA512 digest of the provided bytes.
662fn sha512sum(bytes: &[u8]) -> Result<Vec<u8>> {
663 let mut digest = HashAlgorithm::SHA512.context()?;
664 digest.update(bytes);
665
666 let mut key = Vec::with_capacity(32);
667 digest.digest(&mut key)?;
668 Ok(key)
669}
670
671pub struct VerificationCache {
672 path: PathBuf,
673 set: persistent_set::Set,
674}
675
676impl VerificationCache {
677 const CONTEXT: &'static str = "SqGitVerify0";
678
679 pub fn new() -> Result<Self> {
680 let p = dirs::cache_dir().ok_or(anyhow::anyhow!("No cache dir"))?
681 .join("sq-git.verification.cache");
682 Self::open(p)
683 }
684
685 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
686 let path = path.as_ref();
687
688 Ok(VerificationCache {
689 path: path.into(),
690 set: persistent_set::Set::read(&path, Self::CONTEXT)?,
691 })
692 }
693
694 fn key(&self, policy: &[u8], commit: Oid) -> Result<persistent_set::Value> {
695 let mut digest = HashAlgorithm::SHA512.context()?;
696 digest.update(policy);
697 digest.update(commit.as_bytes());
698
699 let mut key = [0; 32];
700 digest.digest(&mut key)?;
701
702 Ok(key)
703 }
704
705 /// Returns whether (policy, commit id) is in the cache.
706 ///
707 /// If (policy, commit id) is in the cache, then it was previously
708 /// determined that the policy authenticated the commit.
709 pub fn contains(&mut self, policy: &[u8], commit: Oid) -> Result<bool> {
710 Ok(self.set.contains(&self.key(policy, commit)?)?)
711 }
712
713 /// Add (policy, commit id) to the cache.
714 ///
715 /// If (policy, commit id) is in the cache, this means that the
716 /// policy considers the commit to be authenticated. Normally,
717 /// the policy comes from the parent commit, but it may be a
718 /// shadow policy.
719 pub fn insert(&mut self, policy: &[u8], commit: Oid) -> Result<()> {
720 self.set.insert(self.key(policy, commit)?);
721 Ok(())
722 }
723
724 pub fn persist(&mut self) -> Result<()> {
725 self.set.write(&self.path)?;
726 Ok(())
727 }
728}