sequoia_git/verify.rs
1//! Commit-tree traversal and verification.
2
3use std::{
4 path::{Path, PathBuf},
5 collections::{
6 BTreeMap,
7 BTreeSet,
8 },
9};
10
11use anyhow::{anyhow, Context, Result};
12use git2::{
13 Repository,
14 Oid,
15};
16
17use sequoia_openpgp::{
18 self as openpgp,
19 Cert,
20 cert::{
21 amalgamation::ValidAmalgamation,
22 CertParser,
23 },
24 Fingerprint,
25 packet::{Signature, UserID},
26 parse::Parse,
27 policy::StandardPolicy,
28 types::{
29 HashAlgorithm,
30 RevocationStatus,
31 RevocationType,
32 },
33};
34
35use crate::{
36 Policy,
37 persistent_set,
38};
39
40/// Whether to trace execution by default (on stderr).
41const TRACE: bool = false;
42
43#[derive(Default, Debug)]
44pub struct VerificationResult {
45 pub signer_keys: BTreeSet<openpgp::Fingerprint>,
46 pub primary_uids: BTreeSet<UserID>,
47}
48
49pub fn verify(git: &Repository,
50 trust_root: Oid,
51 shadow_policy: Option<&[u8]>,
52 commit_range: (Oid, Oid),
53 results: &mut VerificationResult,
54 keep_going: bool,
55 mut verify_cb: impl FnMut(&Oid, Option<&Oid>, &crate::Result<Vec<crate::Result<(String, Signature, Cert, Fingerprint)>>>) -> crate::Result<()>,
56 cache: &mut VerificationCache)
57 -> Result<()> {
58 tracer!(TRACE, "verify");
59 t!("verify(_, {}, {}..{})", trust_root, commit_range.0, commit_range.1);
60
61 if shadow_policy.is_some() {
62 t!("Using a shadow policy to verify commits.");
63 } else {
64 t!("Using in-band policies to verify commits.");
65 }
66
67 // XXX: These should be passed in as arguments.
68 let p: &StandardPolicy = &StandardPolicy::new();
69 let now = std::time::SystemTime::now();
70
71 // STRATEGY
72 //
73 // We want to determine if we can authenticate the target commit
74 // starting from the trust root. A simple approach is to try each
75 // possible path. Unfortunately, this is exponential in the
76 // number of merges. Consider a project where development is done
77 // on feature branches, and then merged using a merge commit. The
78 // commit graph will look like:
79 //
80 // ```
81 // o <- Merge commit
82 // | \
83 // | o <- Feature branch
84 // | /
85 // o <- Merge commit
86 // | \
87 // | o <- Feature branch
88 // | /
89 // o <- Merge commit
90 // | \
91 // ...
92 // ```
93 //
94 // After 100 such merges, there are 2**100 different paths.
95 // That's intractable.
96 //
97 // Happily, we can do better. We can determine if there is an
98 // authenticated path as a byproduct of a topological walk. This
99 // approach limits both the work we have to do, and the space we
100 // require to O(N), where N is the number of commits preceding the
101 // target commit in the commit graph. (If the trust root is a
102 // dominator, which is usually the case, then N is the number of
103 // commits that precede target, and follow the trust root, which
104 // is often very small.)
105 //
106 // Recall the following facts:
107 //
108 // - A commit is authenticated if it is the trust root, or at
109 // least one parent's policy authenticates the commit.
110 // - Unless the signer's certificate is hard revoked.
111 // - Unless there is an authenticated suffix that
112 // immediately follows the commit, and a commit on that
113 // authenticated suffix goodlists commit.
114 //
115 // In order to check the last condition, we need to know the
116 // goodlists of all the following, authenticated commits when we
117 // process the commit. When we do a topological walk, we know
118 // that when we visit a commit, we've already visited all of its
119 // ancestors. This means that we can easily collect the goodlists
120 // of all of the commits on authenticated paths from the commit to
121 // the target during the walk by propagating good lists from
122 // authenticated commits to their parents. Then, when we are
123 // examining a commit, and we discover that it has been revoked,
124 // we can immediately check if it is on a relevant goodlist.
125 //
126 // To do the topological walk, we first have to do a bit of
127 // preparation. Since we don't know a commit's children by
128 // looking at the commit (and not all children lead to the
129 // target), we have to extract that information from the graph.
130 // We do this by visiting all commits on paths from the target to
131 // the trust root, which we can do in O(N) space and time by doing
132 // a breath first search. When we visit a commit, we increment
133 // the number of children each of its parents has. We also use
134 // this opportunity to discover any certificates that have been
135 // hard revoked.
136 //
137 // Then we do the topological walk. For each commit, we consider
138 // the number of unprocessed children to be the number of recorded
139 // children. We then visit commits that have no unprocessed
140 // children.
141 //
142 // When we visit a commit, and there is an authenticated path from
143 // it to the target, we try to authenticate the commit. That is,
144 // for each parent, we check if the parent can authenticate the
145 // commit. If a parent authenticates the commit, but the signer's
146 // certificate has been revoked, we can immediately check whether
147 // the commit is on a goodlist. For each parent that
148 // authenticated the commit, we merge the commit's *aggregate*
149 // goodlist into the parent's goodlist. Finally, for each parent,
150 // we decrement the number of unprocessed children. If there are
151 // no unprocessed children left, it is added to a pending queue,
152 // so that it can be processed.
153
154 // Return the policy associated with the commit.
155 let read_policy = |commit: &Oid| -> Result<Vec<u8>> {
156 if let Some(p) = &shadow_policy {
157 Ok(p.to_vec())
158 } else {
159 Ok(Policy::read_bytes_from_commit(git, commit)?)
160 }
161 };
162
163 // Whether we've seen the policy file before. The key is the
164 // SHA512 digest of the policy file.
165 let mut policy_files: BTreeSet<Vec<u8>> = Default::default();
166 // Any hard revoked certificates.
167 let mut hard_revoked: BTreeSet<Fingerprint> = Default::default();
168
169 // Scans the specified commit's policy for hard revocations, and
170 // adds them to hard_revoked.
171 let mut scan_policy = |commit_id| -> Result<()> {
172 let policy_bytes = read_policy(&commit_id)?;
173 let policy_hash = sha512sum(&policy_bytes)?;
174 if policy_files.contains(&policy_hash) {
175 t!("Already scanned an identical copy of {}'s policy, skipping.",
176 commit_id);
177 return Ok(());
178 }
179 t!("Scanning {}'s policy for hard revocations", commit_id);
180
181 let policy = Policy::parse_bytes(&policy_bytes)?;
182 policy_files.insert(policy_hash);
183
184 // Scan for revoked certificates.
185 for authorization in policy.authorization().values() {
186 for cert in CertParser::from_bytes(&authorization.keyring)? {
187 let cert = if let Ok(cert) = cert {
188 cert
189 } else {
190 continue;
191 };
192
193 let vc = if let Ok(vc) = cert.with_policy(p, Some(now)) {
194 vc
195 } else {
196 continue;
197 };
198
199 let is_hard_revoked = |rs| {
200 if let RevocationStatus::Revoked(revs) = rs {
201 revs.iter().any(|rev| {
202 if let Some((reason, _)) = rev.reason_for_revocation() {
203 reason.revocation_type() == RevocationType::Hard
204 } else {
205 true
206 }
207 })
208 } else {
209 false
210 }
211 };
212
213 // Check if the certificate is hard revoked.
214 if is_hard_revoked(vc.revocation_status()) {
215 t!("Certificate {} is hard revoked, bad listing",
216 cert.fingerprint());
217 hard_revoked.insert(vc.fingerprint());
218 for k in vc.keys().subkeys().for_signing() {
219 hard_revoked.insert(k.key().fingerprint());
220 t!(" Badlisting signing key {}",
221 k.key().fingerprint());
222 }
223
224 continue;
225 }
226
227 // Check if any of the signing keys are hard revoked.
228 for k in vc.keys().subkeys().for_signing() {
229 if is_hard_revoked(k.revocation_status()) {
230 hard_revoked.insert(k.key().fingerprint());
231 t!(" Signing key {} hard revoked, bad listing",
232 k.key().fingerprint());
233 }
234 }
235 }
236 }
237
238 Ok(())
239 };
240
241 let middle = if trust_root == commit_range.0 {
242 None
243 } else {
244 Some(commit_range.0)
245 };
246
247 struct Commit {
248 // Number of children (commits derived from this one).
249 children: usize,
250
251 // Whether there is an authenticated path from this node to
252 // the target.
253 authenticated_suffix: bool,
254
255 // Whether we still need to go via MIDDLE.
256 traversed_middle: bool,
257 }
258
259 impl Default for Commit {
260 fn default() -> Self {
261 Commit {
262 children: 0,
263 authenticated_suffix: false,
264 traversed_middle: false,
265 }
266 }
267 }
268
269 let mut commits: BTreeMap<Oid, Commit> = Default::default();
270 if trust_root != commit_range.1 {
271 commits.insert(
272 commit_range.1.clone(),
273 Commit {
274 children: 0,
275 authenticated_suffix: true,
276 traversed_middle: middle.is_none(),
277 });
278 }
279
280 // We walk the tree from the target to the trust root (using a
281 // breath first search, but it doesn't matter), and fill in
282 // COMMITS.CHILDREN.
283 {
284 // Commits that we haven't processed yet.
285 let mut pending: BTreeSet<Oid> = Default::default();
286 pending.insert(commit_range.1.clone());
287
288 // Commits that we've processed.
289 let mut processed: BTreeSet<Oid> = Default::default();
290
291 while let Some(commit_id) = pending.pop_first() {
292 processed.insert(commit_id);
293
294 let commit = git.find_commit(commit_id)?;
295
296 // Don't fail if we can't parse the policy file.
297 let _ = scan_policy(commit_id);
298
299 if commit_id == trust_root {
300 // This is the trust root. There is no need to go
301 // further.
302 continue;
303 }
304
305 for parent in commit.parents() {
306 let parent_id = parent.id();
307
308 let info = commits.entry(parent_id).or_default();
309 info.children += 1;
310
311 if ! processed.contains(&parent_id)
312 && ! pending.contains(&parent_id)
313 {
314 pending.insert(parent_id);
315 }
316 }
317 }
318 }
319
320 // The union of the commit goodlists of commits on an
321 // authenticated suffix (not including this commit). We build
322 // this up as we authenticate commits. Since we do a topological
323 // walk, this will be complete for a given commit when we process
324 // that commit.
325 let mut descendant_goodlist: BTreeMap<Oid, BTreeSet<String>>
326 = Default::default();
327
328 let mut errors = Vec::new();
329 let mut unauthenticated_commits: BTreeSet<Oid> = Default::default();
330 let mut authenticated_commits: BTreeSet<Oid> = Default::default();
331
332 // Authenticate the commit using the specified parent.
333 //
334 // NOTE: This must only be called on a commit, if the commit is on
335 // an authenticated suffix!!!
336 let mut authenticate_commit = |commit_id, parent_id| -> Result<bool> {
337 let parent_policy = read_policy(&parent_id)?;
338 let parent_id = if commit_id == parent_id {
339 // This is only the case when verifying the trust root
340 // using the shadow policy.
341 assert_eq!(commit_id, trust_root);
342 assert!(shadow_policy.is_some());
343 None
344 } else {
345 Some(&parent_id)
346 };
347
348 // The current commit's good list.
349 let mut commit_goodlist = BTreeSet::new();
350
351 // XXX: If we have some certificates that are hard revoked,
352 // then we can't use the cache. This is because the cache
353 // doesn't tell us what certificate was used to sign the
354 // commit, which means we can't figure out if the signer's
355 // certificate was revoked when the result is cached.
356 let (vresult, cache_hit) = if hard_revoked.is_empty()
357 && cache.contains(&parent_policy, commit_id)?
358 {
359 (Ok(vec![]), true)
360 } else {
361 let parent_policy = Policy::parse_bytes(&parent_policy)?;
362 let commit_policy = Policy::parse_bytes(read_policy(&commit_id)?)?;
363
364 commit_goodlist = commit_policy.commit_goodlist().clone();
365
366 (parent_policy.verify(git, &commit_id, &commit_policy,
367 &mut results.signer_keys,
368 &mut results.primary_uids),
369 false)
370 };
371
372 if let Err(err) = verify_cb(&commit_id, parent_id, &vresult) {
373 t!("verify_cb -> {}", err);
374 return Err(err.into());
375 }
376
377 if cache_hit {
378 // XXX: communicate this to the caller
379 // instead of eprintln.
380 let id = if let Some(parent_id) = parent_id {
381 format!("{}..{}", parent_id, commit_id)
382 } else {
383 commit_id.to_string()
384 };
385 eprintln!("{}:\n Cached positive verification", id);
386 }
387
388 match vresult {
389 Ok(results) => {
390 // Whether the parent authenticated the commit.
391 let mut good = false;
392 // Whether the commit was goodlisted by a later
393 // commit.
394 let mut goodlisted = false;
395
396 // Whether the commit was goodlisted by the parent's
397 // policy. (Because commits form a Merkle tree, this
398 // is only possible when we are using a shadow
399 // policy.)
400 if ! cache_hit && results.is_empty() {
401 // XXX: communicate this to the caller
402 // instead of eprintln.
403 eprintln!("{}: Explicitly goodlisted", commit_id);
404 good = true;
405 }
406
407 for r in results {
408 match r {
409 Ok((_, _sig, cert, signer_fpr)) => {
410 // It looks good, but make sure the
411 // certificate was not revoked.
412 if hard_revoked.contains(&signer_fpr) {
413 t!("Cert {}{} used to sign {} is revoked.",
414 cert.fingerprint(),
415 if cert.fingerprint() != signer_fpr {
416 format!(", key {}", signer_fpr)
417 } else {
418 "".to_string()
419 },
420 commit_id);
421
422 // It was revoked, but perhaps the
423 // commit was goodlisted.
424 if descendant_goodlist.get(&commit_id)
425 .map(|goodlist| {
426 t!(" Goodlist contains: {}",
427 goodlist
428 .iter().cloned()
429 .collect::<Vec<String>>()
430 .join(", "));
431 goodlist.contains(&commit_id.to_string())
432 })
433 .unwrap_or(false)
434 {
435 t!("But the commit was goodlisted, \
436 so all is good.");
437 goodlisted = true;
438 }
439 } else {
440 t!("{} has a good signature from {}",
441 commit_id, cert.fingerprint());
442 good = true;
443 }
444 }
445 Err(e) => errors.push(
446 anyhow::Error::from(e).context(
447 format!("While verifying commit {}",
448 commit_id))),
449 }
450 }
451
452 // We do NOT insert into the cache if the commit was
453 // goodlisted. The cache is a function of the parent
454 // policy and the children policy; goodlisting is a
455 // function of commit range.
456 if ! cache_hit && good && ! goodlisted {
457 cache.insert(&parent_policy, commit_id)?;
458 }
459
460 if cache_hit || good || goodlisted {
461 // Merge the commit's goodlist into the parent's
462 // goodlist.
463 if let Some(descendant_goodlist)
464 = descendant_goodlist.get(&commit_id)
465 {
466 commit_goodlist.extend(descendant_goodlist.iter().cloned());
467 };
468
469 if let Some(parent_id) = parent_id {
470 if let Some(p_goodlist)
471 = descendant_goodlist.get_mut(&parent_id)
472 {
473 p_goodlist.extend(commit_goodlist.into_iter());
474 } else if ! commit_goodlist.is_empty() {
475 descendant_goodlist.insert(
476 parent_id.clone(), commit_goodlist);
477 }
478 }
479 }
480
481 let authenticated = cache_hit || good || goodlisted;
482 if authenticated {
483 authenticated_commits.insert(commit_id);
484 } else {
485 unauthenticated_commits.insert(commit_id);
486 }
487 Ok(authenticated)
488 },
489 Err(e) => {
490 unauthenticated_commits.insert(commit_id);
491 errors.push(anyhow::Error::from(e).context(
492 format!("While verifying commit {}", commit_id)));
493 Ok(false)
494 },
495 }
496 };
497
498 // We now do a topological walk from the target to the trust root.
499
500 // Assume there is no path until we prove otherwise. (A
501 // zero-length path is already valid.)
502 let mut valid_path = trust_root == commit_range.0
503 && commit_range.0 == commit_range.1;
504
505 // Commits that we haven't processed yet.
506 let mut pending: BTreeSet<Oid> = Default::default();
507 if trust_root != commit_range.1 {
508 pending.insert(commit_range.1.clone());
509 }
510
511 'authentication: while let Some(commit_id) = pending.pop_first() {
512 let commit = git.find_commit(commit_id)?;
513
514 t!("Processing {}: {}", commit_id, commit.summary().unwrap_or(""));
515
516 let commit_info = commits.get(&commit_id).expect("added");
517 assert_eq!(commit_info.children, 0);
518 let authenticated_suffix = commit_info.authenticated_suffix;
519 let traversed_middle = commit_info.traversed_middle;
520
521 for (parent_i, parent) in commit.parents().enumerate() {
522 let parent_id = parent.id();
523 t!("Considering {} -> {} (parent #{})",
524 commit_id, parent_id, parent_i);
525
526 let parent_info = commits.get_mut(&parent_id).expect("added");
527 t!(" Parent has {} unprocessed children", parent_info.children);
528 assert!(parent_info.children > 0);
529 parent_info.children -= 1;
530
531 if authenticated_suffix {
532 t!(" Child IS on an authenticated suffix");
533 } else {
534 t!(" Child IS NOT on an authenticated suffix.");
535 }
536
537 let authenticated = if keep_going || authenticated_suffix {
538 authenticate_commit(commit_id, parent_id)
539 .with_context(|| {
540 format!("Authenticating {} with {}",
541 commit_id, parent_id)
542 })?
543 } else {
544 false
545 };
546
547 if authenticated_suffix && authenticated {
548 t!(" Parent authenticates child");
549 parent_info.authenticated_suffix = true;
550
551 if traversed_middle {
552 parent_info.traversed_middle = true;
553 } else if middle == Some(commit_id) {
554 t!(" Traversed {} on way to trust root.", commit_id);
555 parent_info.traversed_middle = true;
556 }
557
558 if parent_id == trust_root {
559 t!(" Parent is the trust root.");
560
561 // This is the trust root. There is no need to go
562 // further.
563 if ! parent_info.traversed_middle {
564 t!(" but path was not via {}", middle.unwrap());
565 } else {
566 valid_path = true;
567 }
568 if ! keep_going {
569 break 'authentication;
570 }
571 }
572 }
573
574 if parent_info.children == 0 {
575 t!(" No other unprocessed children.");
576 if parent_id == trust_root {
577 t!(" Parent is the start of the commit range, \
578 so it doesn't need to be processed.");
579 } else if ! keep_going && ! parent_info.authenticated_suffix {
580 t!(" Parent does not authenticate any child, \
581 so it doesn't need to be processed.");
582 } else {
583 t!(" Adding parent to pending.");
584 pending.insert(parent_id);
585 }
586 }
587 }
588 }
589
590 // When using a shadow policy, we also authenticate the trust
591 // root with it.
592 if (keep_going || valid_path) && shadow_policy.is_some() {
593 t!("Verifying trust root ({}) using the shadow policy",
594 trust_root);
595 // We verify the trust root using itself? Not quite. We know
596 // that authenticate_commit prefers the shadow policy, and we
597 // know that a shadow policy is set. So this will actually
598 // check that the shadow policy verifies the trust root.
599 if ! authenticate_commit(trust_root, trust_root)? {
600 valid_path = false;
601
602 if let Some(e) = errors.pop() {
603 errors.push(
604 e.context(format!("Could not verify trust root {} \
605 using the specified policy",
606 trust_root)));
607 }
608 }
609 }
610
611 if valid_path {
612 if trust_root == commit_range.0 {
613 eprintln!(
614 "Verified that there is an authenticated path from the trust root\n\
615 {} to {}.",
616 trust_root, commit_range.1);
617 } else {
618 eprintln!(
619 "Verified that there is an authenticated path from the trust root\n\
620 {} via {}\n\
621 to {}.",
622 trust_root, commit_range.0, commit_range.1);
623 }
624 Ok(())
625 } else {
626 if errors.is_empty() {
627 Err(anyhow!("Could not verify commits {}..{}{}",
628 trust_root,
629 if let Some(middle) = middle {
630 format!("{}..", middle)
631 } else {
632 "".to_string()
633 },
634 commit_range.1))
635 } else {
636 let mut e = errors.swap_remove(0)
637 .context(format!("Could not verify commits {}..{}{}",
638 commit_range.0,
639 if let Some(middle) = middle {
640 format!("{}..", middle)
641 } else {
642 "".to_string()
643 },
644 commit_range.1));
645 if ! errors.is_empty() {
646 e = e.context(
647 format!("{} errors occurred while verifying the commits. \
648 {} commits couldn't be authenticated. \
649 Note: not all errors are fatal. \
650 The first error is shown:",
651 errors.len() + 1,
652 unauthenticated_commits.difference(
653 &authenticated_commits).count()));
654 }
655 Err(e)
656 }
657 }
658}
659
660// Returns the SHA512 digest of the provided bytes.
661fn sha512sum(bytes: &[u8]) -> Result<Vec<u8>> {
662 let mut digest = HashAlgorithm::SHA512.context()?.for_digest();
663 digest.update(bytes);
664
665 let mut key = Vec::with_capacity(32);
666 digest.digest(&mut key)?;
667 Ok(key)
668}
669
670pub struct VerificationCache {
671 path: PathBuf,
672 set: persistent_set::Set,
673}
674
675impl VerificationCache {
676 const CONTEXT: &'static str = "SqGitVerify0";
677
678 pub fn new() -> Result<Self> {
679 let p = dirs::cache_dir().ok_or(anyhow::anyhow!("No cache dir"))?
680 .join("sq-git.verification.cache");
681 Self::open(p)
682 }
683
684 pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
685 let path = path.as_ref();
686
687 Ok(VerificationCache {
688 path: path.into(),
689 set: persistent_set::Set::read(&path, Self::CONTEXT)?,
690 })
691 }
692
693 fn key(&self, policy: &[u8], commit: Oid) -> Result<persistent_set::Value> {
694 let mut digest = HashAlgorithm::SHA512.context()?.for_digest();
695 digest.update(policy);
696 digest.update(commit.as_bytes());
697
698 let mut key = [0; 32];
699 digest.digest(&mut key)?;
700
701 Ok(key)
702 }
703
704 /// Returns whether (policy, commit id) is in the cache.
705 ///
706 /// If (policy, commit id) is in the cache, then it was previously
707 /// determined that the policy authenticated the commit.
708 pub fn contains(&mut self, policy: &[u8], commit: Oid) -> Result<bool> {
709 Ok(self.set.contains(&self.key(policy, commit)?)?)
710 }
711
712 /// Add (policy, commit id) to the cache.
713 ///
714 /// If (policy, commit id) is in the cache, this means that the
715 /// policy considers the commit to be authenticated. Normally,
716 /// the policy comes from the parent commit, but it may be a
717 /// shadow policy.
718 pub fn insert(&mut self, policy: &[u8], commit: Oid) -> Result<()> {
719 self.set.insert(self.key(policy, commit)?);
720 Ok(())
721 }
722
723 pub fn persist(&mut self) -> Result<()> {
724 self.set.write(&self.path)?;
725 Ok(())
726 }
727}