Skip to main content

sequoia_git/
verify.rs

1//! Commit-tree traversal and verification.
2
3use std::{
4    borrow::Cow,
5    path::{Path, PathBuf},
6    collections::{
7        BTreeMap,
8        BTreeSet,
9    },
10};
11
12use anyhow::{anyhow, Context, Result};
13use git2::{
14    Repository,
15    Oid,
16};
17
18use sequoia_openpgp::{
19    self as openpgp,
20    Cert,
21    cert::{
22        amalgamation::ValidAmalgamation,
23        CertParser,
24    },
25    Fingerprint,
26    packet::{Signature, UserID},
27    parse::Parse,
28    policy::StandardPolicy,
29    types::{
30        HashAlgorithm,
31        RevocationStatus,
32        RevocationType,
33    },
34};
35
36use crate::{
37    Error,
38    Policy,
39    persistent_set,
40};
41
42/// Whether to trace execution by default (on stderr).
43const TRACE: bool = false;
44
45#[derive(Default, Debug)]
46pub struct VerificationResult {
47    pub signer_keys: BTreeSet<openpgp::Fingerprint>,
48    pub primary_uids: BTreeSet<UserID>,
49}
50
51pub fn verify(git: &Repository,
52              trust_root: Oid,
53              shadow_policy: Option<&[u8]>,
54              commit_range: (Oid, Oid),
55              results: &mut VerificationResult,
56              keep_going: bool,
57              mut verify_cb: impl FnMut(&Oid, Option<&Oid>, &crate::Result<Vec<crate::Result<(String, Signature, Cert, Fingerprint)>>>) -> crate::Result<()>,
58              cache: &mut VerificationCache,
59              quiet: bool, verbose: bool)
60              -> Result<()>
61{
62    tracer!(TRACE, "verify");
63    t!("verify(_, {}, {}..{})", trust_root, commit_range.0, commit_range.1);
64
65    if shadow_policy.is_some() {
66        t!("Using a shadow policy to verify commits.");
67    } else {
68        t!("Using in-band policies to verify commits.");
69    }
70
71    // XXX: These should be passed in as arguments.
72    let p: &StandardPolicy = &StandardPolicy::new();
73    let now = std::time::SystemTime::now();
74
75    // STRATEGY
76    //
77    // We want to determine if we can authenticate the target commit
78    // starting from the trust root.  A simple approach is to try each
79    // possible path.  Unfortunately, this is exponential in the
80    // number of merges.  Consider a project where development is done
81    // on feature branches, and then merged using a merge commit.  The
82    // commit graph will look like:
83    //
84    // ```
85    //   o      <- Merge commit
86    //   | \
87    //   |  o   <- Feature branch
88    //   | /
89    //   o      <- Merge commit
90    //   | \
91    //   |  o   <- Feature branch
92    //   | /
93    //   o      <- Merge commit
94    //   | \
95    //   ...
96    // ```
97    //
98    // After 100 such merges, there are 2**100 different paths.
99    // That's intractable.
100    //
101    // Happily, we can do better.  We can determine if there is an
102    // authenticated path as a byproduct of a topological walk.  This
103    // approach limits both the work we have to do, and the space we
104    // require to O(N), where N is the number of commits preceding the
105    // target commit in the commit graph.  (If the trust root is a
106    // dominator, which is usually the case, then N is the number of
107    // commits that precede target, and follow the trust root, which
108    // is often very small.)
109    //
110    // Recall the following facts:
111    //
112    //   - A commit is authenticated if it is the trust root, or at
113    //     least one parent's policy authenticates the commit.
114    //     - Unless the signer's certificate is hard revoked.
115    //       - Unless there is an authenticated suffix that
116    //         immediately follows the commit, and a commit on that
117    //         authenticated suffix goodlists commit.
118    //
119    // In order to check the last condition, we need to know the
120    // goodlists of all the following, authenticated commits when we
121    // process the commit.  When we do a topological walk, we know
122    // that when we visit a commit, we've already visited all of its
123    // ancestors.  This means that we can easily collect the goodlists
124    // of all of the commits on authenticated paths from the commit to
125    // the target during the walk by propagating good lists from
126    // authenticated commits to their parents.  Then, when we are
127    // examining a commit, and we discover that it has been revoked,
128    // we can immediately check if it is on a relevant goodlist.
129    //
130    // To do the topological walk, we first have to do a bit of
131    // preparation.  Since we don't know a commit's children by
132    // looking at the commit (and not all children lead to the
133    // target), we have to extract that information from the graph.
134    // We do this by visiting all commits on paths from the target to
135    // the trust root, which we can do in O(N) space and time by doing
136    // a breath first search.  When we visit a commit, we increment
137    // the number of children each of its parents has.  We also use
138    // this opportunity to discover any certificates that have been
139    // hard revoked.
140    //
141    // Then we do the topological walk.  For each commit, we consider
142    // the number of unprocessed children to be the number of recorded
143    // children.  We then visit commits that have no unprocessed
144    // children.
145    //
146    // When we visit a commit, and there is an authenticated path from
147    // it to the target, we try to authenticate the commit.  That is,
148    // for each parent, we check if the parent can authenticate the
149    // commit.  If a parent authenticates the commit, but the signer's
150    // certificate has been revoked, we can immediately check whether
151    // the commit is on a goodlist.  For each parent that
152    // authenticated the commit, we merge the commit's *aggregate*
153    // goodlist into the parent's goodlist.  Finally, for each parent,
154    // we decrement the number of unprocessed children.  If there are
155    // no unprocessed children left, it is added to a pending queue,
156    // so that it can be processed.
157
158    // Return the policy associated with the commit.
159    let read_policy = |commit: &Oid| -> Result<Cow<[u8]>> {
160        if let Some(p) = &shadow_policy {
161            Ok(Cow::Borrowed(p))
162        } else {
163            match Policy::read_bytes_from_commit(git, commit) {
164                Ok(policy) => Ok(Cow::Owned(policy)),
165                Err(err) => {
166                    if let Error::MissingPolicy(_) = err {
167                        Ok(Cow::Borrowed(b""))
168                    } else {
169                        Err(err.into())
170                    }
171                }
172            }
173        }
174    };
175
176    // Whether we've seen the policy file before.  The key is the
177    // SHA512 digest of the policy file.
178    let mut policy_files: BTreeSet<Vec<u8>> = Default::default();
179    // Any hard revoked certificates.
180    let mut hard_revoked: BTreeSet<Fingerprint> = Default::default();
181
182    // Scans the specified commit's policy for hard revocations, and
183    // adds them to hard_revoked.
184    let mut scan_policy = |commit_id| -> Result<()> {
185        let policy_bytes = read_policy(&commit_id)?;
186        let policy_hash = sha512sum(&policy_bytes)?;
187        if policy_files.contains(&policy_hash) {
188            t!("Already scanned an identical copy of {}'s policy, skipping.",
189               commit_id);
190            return Ok(());
191        }
192        t!("Scanning {}'s policy for hard revocations", commit_id);
193
194        let policy = Policy::parse_bytes(&policy_bytes)?;
195        policy_files.insert(policy_hash);
196
197        // Scan for revoked certificates.
198        for authorization in policy.authorization().values() {
199            for cert in CertParser::from_bytes(&authorization.keyring)? {
200                let cert = if let Ok(cert) = cert {
201                    cert
202                } else {
203                    continue;
204                };
205
206                let vc = if let Ok(vc) = cert.with_policy(p, Some(now)) {
207                    vc
208                } else {
209                    continue;
210                };
211
212                let is_hard_revoked = |rs| {
213                    if let RevocationStatus::Revoked(revs) = rs {
214                        revs.iter().any(|rev| {
215                            if let Some((reason, _)) = rev.reason_for_revocation() {
216                                reason.revocation_type() == RevocationType::Hard
217                            } else {
218                                true
219                            }
220                        })
221                    } else {
222                        false
223                    }
224                };
225
226                // Check if the certificate is hard revoked.
227                if is_hard_revoked(vc.revocation_status()) {
228                    t!("Certificate {} is hard revoked, bad listing",
229                       cert.fingerprint());
230                    hard_revoked.insert(vc.fingerprint());
231                    for k in vc.keys().subkeys().for_signing() {
232                        hard_revoked.insert(k.key().fingerprint());
233                        t!("  Badlisting signing key {}",
234                           k.key().fingerprint());
235                    }
236
237                    continue;
238                }
239
240                // Check if any of the signing keys are hard revoked.
241                for k in vc.keys().subkeys().for_signing() {
242                    if is_hard_revoked(k.revocation_status()) {
243                        hard_revoked.insert(k.key().fingerprint());
244                        t!("  Signing key {} hard revoked, bad listing",
245                           k.key().fingerprint());
246                    }
247                }
248            }
249        }
250
251        Ok(())
252    };
253
254    let middle = if trust_root == commit_range.0 {
255        None
256    } else {
257        Some(commit_range.0)
258    };
259
260    struct Commit {
261        // Number of unprocessed children (commits following this
262        // one).
263        unprocessed_children: usize,
264
265        // Whether there is an authenticated path from this node to
266        // the target.
267        authenticated_suffix: bool,
268
269        // Whether we still need to go via MIDDLE.
270        traversed_middle: bool,
271    }
272
273    impl Default for Commit {
274        fn default() -> Self {
275            Commit {
276                unprocessed_children: 0,
277                authenticated_suffix: false,
278                traversed_middle: false,
279            }
280        }
281    }
282
283    let mut commits: BTreeMap<Oid, Commit> = Default::default();
284    if trust_root != commit_range.1 {
285        commits.insert(
286            commit_range.1.clone(),
287            Commit {
288                unprocessed_children: 0,
289                authenticated_suffix: true,
290                traversed_middle: middle.is_none(),
291            });
292    }
293
294    // We walk the tree from the target to the trust root (using a
295    // breath first search, but it doesn't matter), and fill in
296    // COMMITS.
297    {
298        // Commits that we haven't processed yet.
299        let mut pending: BTreeSet<Oid> = Default::default();
300        pending.insert(commit_range.1.clone());
301
302        // Commits that we've processed.
303        let mut processed: BTreeSet<Oid> = Default::default();
304
305        while let Some(commit_id) = pending.pop_first() {
306            processed.insert(commit_id);
307
308            let commit = git.find_commit(commit_id)?;
309
310            // Don't fail if we can't parse the policy file.
311            let _ = scan_policy(commit_id);
312
313            if commit_id == trust_root {
314                // This is the trust root.  There is no need to go
315                // further.
316                continue;
317            }
318
319            for parent in commit.parents() {
320                let parent_id = parent.id();
321
322                // Add an entry for the parent commit (if we haven't
323                // yet done so).
324                let info = commits.entry(parent_id).or_default();
325                info.unprocessed_children += 1;
326
327                if ! processed.contains(&parent_id)
328                    && ! pending.contains(&parent_id)
329                {
330                    pending.insert(parent_id);
331                }
332            }
333        }
334    }
335
336    // The union of the commit goodlists of commits on an
337    // authenticated suffix (not including this commit).  We build
338    // this up as we authenticate commits.  Since we do a topological
339    // walk, this will be complete for a given commit when we process
340    // that commit.
341    let mut descendant_goodlist: BTreeMap<Oid, BTreeSet<String>>
342        = Default::default();
343
344    let mut errors = Vec::new();
345    let mut unauthenticated_commits: BTreeSet<Oid> = Default::default();
346    let mut authenticated_commits: BTreeSet<Oid> = Default::default();
347
348    // Authenticate the commit using the specified parent.
349    //
350    // NOTE: This must only be called on a commit, if the commit is on
351    // an authenticated suffix!!!
352    let mut authenticate_commit = |commit_id, parent_id| -> Result<bool> {
353        let parent_policy = read_policy(&parent_id)?;
354        let parent_id = if commit_id == parent_id {
355            // This is only the case when verifying the trust root
356            // using the shadow policy.
357            assert_eq!(commit_id, trust_root);
358            assert!(shadow_policy.is_some());
359            None
360        } else {
361            Some(&parent_id)
362        };
363
364        // The current commit's good list.
365        let mut commit_goodlist = BTreeSet::new();
366
367        // XXX: If we have some certificates that are hard revoked,
368        // then we can't use the cache.  This is because the cache
369        // doesn't tell us what certificate was used to sign the
370        // commit, which means we can't figure out if the signer's
371        // certificate was revoked when the result is cached.
372        let (vresult, cache_hit) = if hard_revoked.is_empty()
373            && cache.contains(&parent_policy, commit_id)?
374        {
375            (Ok(vec![]), true)
376        } else {
377            let parent_policy = Policy::parse_bytes(&parent_policy)?;
378            let commit_policy = Policy::parse_bytes(read_policy(&commit_id)?)?;
379
380            commit_goodlist = commit_policy.commit_goodlist().clone();
381
382            (parent_policy.verify(git, &commit_id, &commit_policy,
383                                  &mut results.signer_keys,
384                                  &mut results.primary_uids),
385             false)
386        };
387
388        if let Err(err) = verify_cb(&commit_id, parent_id, &vresult) {
389            t!("verify_cb -> {}", err);
390            return Err(err.into());
391        }
392
393        if cache_hit {
394            // XXX: communicate this to the caller
395            // instead of println.
396            let id = if let Some(parent_id) = parent_id {
397                format!("{}..{}", parent_id, commit_id)
398            } else {
399                commit_id.to_string()
400            };
401            if verbose {
402                println!("{}:\n  Cached positive verification", id);
403            }
404        }
405
406        match vresult {
407            Ok(results) => {
408                // Whether the parent authenticated the commit.
409                let mut good = false;
410                // Whether the commit was goodlisted by a later
411                // commit.
412                let mut goodlisted = false;
413
414                // Whether the commit was goodlisted by the parent's
415                // policy.  (Because commits form a Merkle tree, this
416                // is only possible when we are using a shadow
417                // policy.)
418                if ! cache_hit && results.is_empty() {
419                    // XXX: communicate this to the caller
420                    // instead of eprintln.
421                    if verbose {
422                        println!("{}: Explicitly goodlisted", commit_id);
423                    }
424                    good = true;
425                }
426
427                for r in results {
428                    match r {
429                        Ok((_, _sig, cert, signer_fpr)) => {
430                            // It looks good, but make sure the
431                            // certificate was not revoked.
432                            if hard_revoked.contains(&signer_fpr) {
433                                t!("Cert {}{} used to sign {} is revoked.",
434                                   cert.fingerprint(),
435                                   if cert.fingerprint() != signer_fpr {
436                                       format!(", key {}", signer_fpr)
437                                   } else {
438                                       "".to_string()
439                                   },
440                                   commit_id);
441
442                                // It was revoked, but perhaps the
443                                // commit was goodlisted.
444                                if descendant_goodlist.get(&commit_id)
445                                    .map(|goodlist| {
446                                        t!("  Goodlist contains: {}",
447                                           goodlist
448                                               .iter().cloned()
449                                               .collect::<Vec<String>>()
450                                               .join(", "));
451                                        goodlist.contains(&commit_id.to_string())
452                                    })
453                                    .unwrap_or(false)
454                                {
455                                    t!("But the commit was goodlisted, \
456                                        so all is good.");
457                                    goodlisted = true;
458                                }
459                            } else {
460                                t!("{} has a good signature from {}",
461                                   commit_id, cert.fingerprint());
462                                good = true;
463                            }
464                        }
465                        Err(e) => errors.push(
466                            anyhow::Error::from(e).context(
467                                format!("While verifying commit {}",
468                                        commit_id))),
469                    }
470                }
471
472                // We do NOT insert into the cache if the commit was
473                // goodlisted.  The cache is a function of the parent
474                // policy and the children policy; goodlisting is a
475                // function of commit range.
476                if ! cache_hit && good && ! goodlisted {
477                    cache.insert(&parent_policy, commit_id)?;
478                }
479
480                if cache_hit || good || goodlisted {
481                    // Merge the commit's goodlist into the parent's
482                    // goodlist.
483                    if let Some(descendant_goodlist)
484                        = descendant_goodlist.get(&commit_id)
485                    {
486                        commit_goodlist.extend(descendant_goodlist.iter().cloned());
487                    };
488
489                    if let Some(parent_id) = parent_id {
490                        if let Some(p_goodlist)
491                            = descendant_goodlist.get_mut(&parent_id)
492                        {
493                            p_goodlist.extend(commit_goodlist.into_iter());
494                        } else if ! commit_goodlist.is_empty() {
495                            descendant_goodlist.insert(
496                                parent_id.clone(), commit_goodlist);
497                        }
498                    }
499                }
500
501                let authenticated = cache_hit || good || goodlisted;
502                if authenticated {
503                    authenticated_commits.insert(commit_id);
504                } else {
505                    unauthenticated_commits.insert(commit_id);
506                }
507                Ok(authenticated)
508            },
509            Err(e) => {
510                unauthenticated_commits.insert(commit_id);
511                errors.push(anyhow::Error::from(e).context(
512                    format!("While verifying commit {}", commit_id)));
513                Ok(false)
514            },
515        }
516    };
517
518    // We now do a topological walk from the target to the trust root.
519
520    // Assume there is no path until we prove otherwise.  (A
521    // zero-length path is already valid.)
522    let mut valid_path = trust_root == commit_range.0
523        && commit_range.0 == commit_range.1;
524
525    // Commits that we haven't processed yet and have no unprocessed
526    // children.
527    let mut pending: BTreeSet<Oid> = Default::default();
528    if trust_root != commit_range.1 {
529        pending.insert(commit_range.1.clone());
530    }
531
532    'authentication: while let Some(commit_id) = pending.pop_first() {
533        let commit = git.find_commit(commit_id)?;
534
535        t!("Processing {}: {}", commit_id, commit.summary().unwrap_or(""));
536
537        let commit_info = commits.get(&commit_id).expect("added");
538        assert_eq!(commit_info.unprocessed_children, 0);
539        let authenticated_suffix = commit_info.authenticated_suffix;
540        let traversed_middle = commit_info.traversed_middle;
541
542        for (parent_i, parent) in commit.parents().enumerate() {
543            let parent_id = parent.id();
544            t!("Considering {} -> {} (parent #{} of {})",
545               commit_id, parent_id, parent_i + 1, commit.parents().len());
546
547            let parent_is_trust_root = parent_id == trust_root;
548
549            let parent_info = commits.get_mut(&parent_id)
550                .with_context(|| format!("Looking up {}", parent_id))
551                .expect("added");
552            t!("  Parent has {} unprocessed children",
553               parent_info.unprocessed_children);
554            assert!(parent_info.unprocessed_children > 0);
555            parent_info.unprocessed_children -= 1;
556            if parent_info.unprocessed_children == 0 && ! parent_is_trust_root {
557                t!("  Adding parent to pending queue for processing");
558                pending.insert(parent_id);
559            }
560
561            if authenticated_suffix {
562                t!("  Child IS on an authenticated suffix");
563            } else {
564                t!("  Child IS NOT on an authenticated suffix.");
565            }
566
567            let authenticated = if keep_going || authenticated_suffix {
568                authenticate_commit(commit_id, parent_id)
569                    .with_context(|| {
570                        format!("Authenticating {} with {}",
571                                commit_id, parent_id)
572                    })?
573            } else {
574                false
575            };
576            t!("  Could {}authenticate commit.",
577               if authenticated { "" } else { "not " });
578
579            if authenticated_suffix && authenticated {
580                t!("  Parent authenticates child");
581                if ! parent_info.authenticated_suffix {
582                    t!("  Parent is now part of an authenticated suffix.");
583                }
584                parent_info.authenticated_suffix = true;
585
586                if traversed_middle {
587                    parent_info.traversed_middle = true;
588                } else if middle == Some(commit_id) {
589                    t!("  Traversed {} on way to trust root.", commit_id);
590                    parent_info.traversed_middle = true;
591                }
592
593                if parent_is_trust_root {
594                    t!("  Parent is the trust root.");
595
596                    if ! parent_info.traversed_middle {
597                        t!("  but path was not via {}", middle.unwrap());
598                    } else {
599                        // This is the trust root and we traversed the
600                        // middle commit.  There is no need to go
601                        // further.
602                        valid_path = true;
603                        if ! keep_going {
604                            break 'authentication;
605                        }
606                    }
607                }
608            }
609        }
610    }
611
612    t!("No commits pending.");
613
614    // When using a shadow policy, we also authenticate the trust
615    // root with it.
616    if (keep_going || valid_path) && shadow_policy.is_some() {
617        t!("Verifying trust root ({}) using the shadow policy",
618           trust_root);
619        // We verify the trust root using itself?  Not quite.  We know
620        // that authenticate_commit prefers the shadow policy, and we
621        // know that a shadow policy is set.  So this will actually
622        // check that the shadow policy verifies the trust root.
623        if ! authenticate_commit(trust_root, trust_root)? {
624            valid_path = false;
625
626            if let Some(e) = errors.pop() {
627                errors.push(
628                    e.context(format!("Could not verify trust root {} \
629                                       using the specified policy",
630                                      trust_root)));
631            }
632        }
633    }
634
635    if valid_path {
636        if trust_root == commit_range.0 {
637            if ! quiet {
638                println!(
639                    "Verified that there is an authenticated path from the trust root\n\
640                     {} to {}.",
641                    trust_root, commit_range.1);
642            }
643        } else {
644            if ! quiet {
645                println!(
646                    "Verified that there is an authenticated path from the trust root\n\
647                     {} via {}\n\
648                     to {}.",
649                    trust_root, commit_range.0, commit_range.1);
650            }
651        }
652        Ok(())
653    } else {
654        if errors.is_empty() {
655            Err(anyhow!("Could not verify commits {}..{}{}",
656                        trust_root,
657                        if let Some(middle) = middle {
658                            format!("{}..", middle)
659                        } else {
660                            "".to_string()
661                        },
662                        commit_range.1))
663        } else {
664            let mut e = errors.swap_remove(0)
665                .context(format!("Could not verify commits {}..{}{}",
666                                 commit_range.0,
667                                 if let Some(middle) = middle {
668                                     format!("{}..", middle)
669                                 } else {
670                                     "".to_string()
671                                 },
672                                 commit_range.1));
673            if ! errors.is_empty() {
674                e = e.context(
675                    format!("{} errors occurred while verifying the commits.  \
676                             {} commits couldn't be authenticated.  \
677                             Note: not all errors are fatal.  \
678                             The first error is shown:",
679                            errors.len() + 1,
680                            unauthenticated_commits.difference(
681                                &authenticated_commits).count()));
682            }
683            Err(e)
684        }
685    }
686}
687
688// Returns the SHA512 digest of the provided bytes.
689fn sha512sum(bytes: &[u8]) -> Result<Vec<u8>> {
690    let mut digest = HashAlgorithm::SHA512.context()?.for_digest();
691    digest.update(bytes);
692
693    let mut key = Vec::with_capacity(32);
694    digest.digest(&mut key)?;
695    Ok(key)
696}
697
698pub struct VerificationCache {
699    path: PathBuf,
700    set: persistent_set::Set,
701}
702
703impl VerificationCache {
704    const CONTEXT: &'static str = "SqGitVerify0";
705
706    pub fn new() -> Result<Self> {
707        let p = dirs::cache_dir().ok_or(anyhow::anyhow!("No cache dir"))?
708            .join("sq-git.verification.cache");
709        Self::open(p)
710    }
711
712    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
713        let path = path.as_ref();
714
715        Ok(VerificationCache {
716            path: path.into(),
717            set: persistent_set::Set::read(&path, Self::CONTEXT)?,
718        })
719    }
720
721    fn key(&self, policy: &[u8], commit: Oid) -> Result<persistent_set::Value> {
722        let mut digest = HashAlgorithm::SHA512.context()?.for_digest();
723        digest.update(policy);
724        digest.update(commit.as_bytes());
725
726        let mut key = [0; 32];
727        digest.digest(&mut key)?;
728
729        Ok(key)
730    }
731
732    /// Returns whether (policy, commit id) is in the cache.
733    ///
734    /// If (policy, commit id) is in the cache, then it was previously
735    /// determined that the policy authenticated the commit.
736    pub fn contains(&mut self, policy: &[u8], commit: Oid) -> Result<bool> {
737        Ok(self.set.contains(&self.key(policy, commit)?)?)
738    }
739
740    /// Add (policy, commit id) to the cache.
741    ///
742    /// If (policy, commit id) is in the cache, this means that the
743    /// policy considers the commit to be authenticated.  Normally,
744    /// the policy comes from the parent commit, but it may be a
745    /// shadow policy.
746    pub fn insert(&mut self, policy: &[u8], commit: Oid) -> Result<()> {
747        self.set.insert(self.key(policy, commit)?);
748        Ok(())
749    }
750
751    pub fn persist(&mut self) -> Result<()> {
752        self.set.write(&self.path)?;
753        Ok(())
754    }
755}