sequoia_git/
verify.rs

1//! Commit-tree traversal and verification.
2
3use std::{
4    path::{Path, PathBuf},
5    collections::{
6        BTreeMap,
7        BTreeSet,
8    },
9};
10
11use anyhow::{anyhow, Context, Result};
12use git2::{
13    Repository,
14    Oid,
15};
16
17use sequoia_openpgp::{
18    self as openpgp,
19    Cert,
20    cert::{
21        amalgamation::ValidAmalgamation,
22        CertParser,
23    },
24    crypto::hash::Digest,
25    Fingerprint,
26    packet::{Signature, UserID},
27    parse::Parse,
28    policy::StandardPolicy,
29    types::{
30        HashAlgorithm,
31        RevocationStatus,
32        RevocationType,
33    },
34};
35
36use crate::{
37    Policy,
38    persistent_set,
39};
40
41/// Whether to trace execution by default (on stderr).
42const TRACE: bool = false;
43
44#[derive(Default, Debug)]
45pub struct VerificationResult {
46    pub signer_keys: BTreeSet<openpgp::Fingerprint>,
47    pub primary_uids: BTreeSet<UserID>,
48}
49
50pub fn verify(git: &Repository,
51              trust_root: Oid,
52              shadow_policy: Option<&[u8]>,
53              commit_range: (Oid, Oid),
54              results: &mut VerificationResult,
55              keep_going: bool,
56              mut verify_cb: impl FnMut(&Oid, Option<&Oid>, &crate::Result<Vec<crate::Result<(String, Signature, Cert, Fingerprint)>>>) -> crate::Result<()>,
57              cache: &mut VerificationCache)
58              -> Result<()> {
59    tracer!(TRACE, "verify");
60    t!("verify(_, {}, {}..{})", trust_root, commit_range.0, commit_range.1);
61
62    if shadow_policy.is_some() {
63        t!("Using a shadow policy to verify commits.");
64    } else {
65        t!("Using in-band policies to verify commits.");
66    }
67
68    // XXX: These should be passed in as arguments.
69    let p: &StandardPolicy = &StandardPolicy::new();
70    let now = std::time::SystemTime::now();
71
72    // STRATEGY
73    //
74    // We want to determine if we can authenticate the target commit
75    // starting from the trust root.  A simple approach is to try each
76    // possible path.  Unfortunately, this is exponential in the
77    // number of merges.  Consider a project where development is done
78    // on feature branches, and then merged using a merge commit.  The
79    // commit graph will look like:
80    //
81    // ```
82    //   o      <- Merge commit
83    //   | \
84    //   |  o   <- Feature branch
85    //   | /
86    //   o      <- Merge commit
87    //   | \
88    //   |  o   <- Feature branch
89    //   | /
90    //   o      <- Merge commit
91    //   | \
92    //   ...
93    // ```
94    //
95    // After 100 such merges, there are 2**100 different paths.
96    // That's intractable.
97    //
98    // Happily, we can do better.  We can determine if there is an
99    // authenticated path as a byproduct of a topological walk.  This
100    // approach limits both the work we have to do, and the space we
101    // require to O(N), where N is the number of commits preceding the
102    // target commit in the commit graph.  (If the trust root is a
103    // dominator, which is usually the case, then N is the number of
104    // commits that precede target, and follow the trust root, which
105    // is often very small.)
106    //
107    // Recall the following facts:
108    //
109    //   - A commit is authenticated if it is the trust root, or at
110    //     least one parent's policy authenticates the commit.
111    //     - Unless the signer's certificate is hard revoked.
112    //       - Unless there is an authenticated suffix that
113    //         immediately follows the commit, and a commit on that
114    //         authenticated suffix goodlists commit.
115    //
116    // In order to check the last condition, we need to know the
117    // goodlists of all the following, authenticated commits when we
118    // process the commit.  When we do a topological walk, we know
119    // that when we visit a commit, we've already visited all of its
120    // ancestors.  This means that we can easily collect the goodlists
121    // of all of the commits on authenticated paths from the commit to
122    // the target during the walk by propagating good lists from
123    // authenticated commits to their parents.  Then, when we are
124    // examining a commit, and we discover that it has been revoked,
125    // we can immediately check if it is on a relevant goodlist.
126    //
127    // To do the topological walk, we first have to do a bit of
128    // preparation.  Since we don't know a commit's children by
129    // looking at the commit (and not all children lead to the
130    // target), we have to extract that information from the graph.
131    // We do this by visiting all commits on paths from the target to
132    // the trust root, which we can do in O(N) space and time by doing
133    // a breath first search.  When we visit a commit, we increment
134    // the number of children each of its parents has.  We also use
135    // this opportunity to discover any certificates that have been
136    // hard revoked.
137    //
138    // Then we do the topological walk.  For each commit, we consider
139    // the number of unprocessed children to be the number of recorded
140    // children.  We then visit commits that have no unprocessed
141    // children.
142    //
143    // When we visit a commit, and there is an authenticated path from
144    // it to the target, we try to authenticate the commit.  That is,
145    // for each parent, we check if the parent can authenticate the
146    // commit.  If a parent authenticates the commit, but the signer's
147    // certificate has been revoked, we can immediately check whether
148    // the commit is on a goodlist.  For each parent that
149    // authenticated the commit, we merge the commit's *aggregate*
150    // goodlist into the parent's goodlist.  Finally, for each parent,
151    // we decrement the number of unprocessed children.  If there are
152    // no unprocessed children left, it is added to a pending queue,
153    // so that it can be processed.
154
155    // Return the policy associated with the commit.
156    let read_policy = |commit: &Oid| -> Result<Vec<u8>> {
157        if let Some(p) = &shadow_policy {
158            Ok(p.to_vec())
159        } else {
160            Ok(Policy::read_bytes_from_commit(git, commit)?)
161        }
162    };
163
164    // Whether we've seen the policy file before.  The key is the
165    // SHA512 digest of the policy file.
166    let mut policy_files: BTreeSet<Vec<u8>> = Default::default();
167    // Any hard revoked certificates.
168    let mut hard_revoked: BTreeSet<Fingerprint> = Default::default();
169
170    // Scans the specified commit's policy for hard revocations, and
171    // adds them to hard_revoked.
172    let mut scan_policy = |commit_id| -> Result<()> {
173        let policy_bytes = read_policy(&commit_id)?;
174        let policy_hash = sha512sum(&policy_bytes)?;
175        if policy_files.contains(&policy_hash) {
176            t!("Already scanned an identical copy of {}'s policy, skipping.",
177               commit_id);
178            return Ok(());
179        }
180        t!("Scanning {}'s policy for hard revocations", commit_id);
181
182        let policy = Policy::parse_bytes(&policy_bytes)?;
183        policy_files.insert(policy_hash);
184
185        // Scan for revoked certificates.
186        for authorization in policy.authorization().values() {
187            for cert in CertParser::from_bytes(&authorization.keyring)? {
188                let cert = if let Ok(cert) = cert {
189                    cert
190                } else {
191                    continue;
192                };
193
194                let vc = if let Ok(vc) = cert.with_policy(p, Some(now)) {
195                    vc
196                } else {
197                    continue;
198                };
199
200                let is_hard_revoked = |rs| {
201                    if let RevocationStatus::Revoked(revs) = rs {
202                        revs.iter().any(|rev| {
203                            if let Some((reason, _)) = rev.reason_for_revocation() {
204                                reason.revocation_type() == RevocationType::Hard
205                            } else {
206                                true
207                            }
208                        })
209                    } else {
210                        false
211                    }
212                };
213
214                // Check if the certificate is hard revoked.
215                if is_hard_revoked(vc.revocation_status()) {
216                    t!("Certificate {} is hard revoked, bad listing",
217                       cert.fingerprint());
218                    hard_revoked.insert(vc.fingerprint());
219                    for k in vc.keys().subkeys().for_signing() {
220                        hard_revoked.insert(k.fingerprint());
221                        t!("  Badlisting signing key {}",
222                           k.fingerprint());
223                    }
224
225                    continue;
226                }
227
228                // Check if any of the signing keys are hard revoked.
229                for k in vc.keys().subkeys().for_signing() {
230                    if is_hard_revoked(k.revocation_status()) {
231                        hard_revoked.insert(k.fingerprint());
232                        t!("  Signing key {} hard revoked, bad listing",
233                           k.fingerprint());
234                    }
235                }
236            }
237        }
238
239        Ok(())
240    };
241
242    let middle = if trust_root == commit_range.0 {
243        None
244    } else {
245        Some(commit_range.0)
246    };
247
248    struct Commit {
249        // Number of children (commits derived from this one).
250        children: usize,
251
252        // Whether there is an authenticated path from this node to
253        // the target.
254        authenticated_suffix: bool,
255
256        // Whether we still need to go via MIDDLE.
257        traversed_middle: bool,
258    }
259
260    impl Default for Commit {
261        fn default() -> Self {
262            Commit {
263                children: 0,
264                authenticated_suffix: false,
265                traversed_middle: false,
266            }
267        }
268    }
269
270    let mut commits: BTreeMap<Oid, Commit> = Default::default();
271    if trust_root != commit_range.1 {
272        commits.insert(
273            commit_range.1.clone(),
274            Commit {
275                children: 0,
276                authenticated_suffix: true,
277                traversed_middle: middle.is_none(),
278            });
279    }
280
281    // We walk the tree from the target to the trust root (using a
282    // breath first search, but it doesn't matter), and fill in
283    // COMMITS.CHILDREN.
284    {
285        // Commits that we haven't processed yet.
286        let mut pending: BTreeSet<Oid> = Default::default();
287        pending.insert(commit_range.1.clone());
288
289        // Commits that we've processed.
290        let mut processed: BTreeSet<Oid> = Default::default();
291
292        while let Some(commit_id) = pending.pop_first() {
293            processed.insert(commit_id);
294
295            let commit = git.find_commit(commit_id)?;
296
297            // Don't fail if we can't parse the policy file.
298            let _ = scan_policy(commit_id);
299
300            if commit_id == trust_root {
301                // This is the trust root.  There is no need to go
302                // further.
303                continue;
304            }
305
306            for parent in commit.parents() {
307                let parent_id = parent.id();
308
309                let info = commits.entry(parent_id).or_default();
310                info.children += 1;
311
312                if ! processed.contains(&parent_id)
313                    && ! pending.contains(&parent_id)
314                {
315                    pending.insert(parent_id);
316                }
317            }
318        }
319    }
320
321    // The union of the commit goodlists of commits on an
322    // authenticated suffix (not including this commit).  We build
323    // this up as we authenticate commits.  Since we do a topological
324    // walk, this will be complete for a given commit when we process
325    // that commit.
326    let mut descendant_goodlist: BTreeMap<Oid, BTreeSet<String>>
327        = Default::default();
328
329    let mut errors = Vec::new();
330    let mut unauthenticated_commits: BTreeSet<Oid> = Default::default();
331    let mut authenticated_commits: BTreeSet<Oid> = Default::default();
332
333    // Authenticate the commit using the specified parent.
334    //
335    // NOTE: This must only be called on a commit, if the commit is on
336    // an authenticated suffix!!!
337    let mut authenticate_commit = |commit_id, parent_id| -> Result<bool> {
338        let parent_policy = read_policy(&parent_id)?;
339        let parent_id = if commit_id == parent_id {
340            // This is only the case when verifying the trust root
341            // using the shadow policy.
342            assert_eq!(commit_id, trust_root);
343            assert!(shadow_policy.is_some());
344            None
345        } else {
346            Some(&parent_id)
347        };
348
349        // The current commit's good list.
350        let mut commit_goodlist = BTreeSet::new();
351
352        // XXX: If we have some certificates that are hard revoked,
353        // then we can't use the cache.  This is because the cache
354        // doesn't tell us what certificate was used to sign the
355        // commit, which means we can't figure out if the signer's
356        // certificate was revoked when the result is cached.
357        let (vresult, cache_hit) = if hard_revoked.is_empty()
358            && cache.contains(&parent_policy, commit_id)?
359        {
360            (Ok(vec![]), true)
361        } else {
362            let parent_policy = Policy::parse_bytes(&parent_policy)?;
363            let commit_policy = Policy::parse_bytes(read_policy(&commit_id)?)?;
364
365            commit_goodlist = commit_policy.commit_goodlist().clone();
366
367            (parent_policy.verify(git, &commit_id, &commit_policy,
368                                  &mut results.signer_keys,
369                                  &mut results.primary_uids),
370             false)
371        };
372
373        if let Err(err) = verify_cb(&commit_id, parent_id, &vresult) {
374            t!("verify_cb -> {}", err);
375            return Err(err.into());
376        }
377
378        if cache_hit {
379            // XXX: communicate this to the caller
380            // instead of eprintln.
381            let id = if let Some(parent_id) = parent_id {
382                format!("{}..{}", parent_id, commit_id)
383            } else {
384                commit_id.to_string()
385            };
386            eprintln!("{}:\n  Cached positive verification", id);
387        }
388
389        match vresult {
390            Ok(results) => {
391                // Whether the parent authenticated the commit.
392                let mut good = false;
393                // Whether the commit was goodlisted by a later
394                // commit.
395                let mut goodlisted = false;
396
397                // Whether the commit was goodlisted by the parent's
398                // policy.  (Because commits form a Merkle tree, this
399                // is only possible when we are using a shadow
400                // policy.)
401                if ! cache_hit && results.is_empty() {
402                    // XXX: communicate this to the caller
403                    // instead of eprintln.
404                    eprintln!("{}: Explicitly goodlisted", commit_id);
405                    good = true;
406                }
407
408                for r in results {
409                    match r {
410                        Ok((_, _sig, cert, signer_fpr)) => {
411                            // It looks good, but make sure the
412                            // certificate was not revoked.
413                            if hard_revoked.contains(&signer_fpr) {
414                                t!("Cert {}{} used to sign {} is revoked.",
415                                   cert.fingerprint(),
416                                   if cert.fingerprint() != signer_fpr {
417                                       format!(", key {}", signer_fpr)
418                                   } else {
419                                       "".to_string()
420                                   },
421                                   commit_id);
422
423                                // It was revoked, but perhaps the
424                                // commit was goodlisted.
425                                if descendant_goodlist.get(&commit_id)
426                                    .map(|goodlist| {
427                                        t!("  Goodlist contains: {}",
428                                           goodlist
429                                               .iter().cloned()
430                                               .collect::<Vec<String>>()
431                                               .join(", "));
432                                        goodlist.contains(&commit_id.to_string())
433                                    })
434                                    .unwrap_or(false)
435                                {
436                                    t!("But the commit was goodlisted, \
437                                        so all is good.");
438                                    goodlisted = true;
439                                }
440                            } else {
441                                t!("{} has a good signature from {}",
442                                   commit_id, cert.fingerprint());
443                                good = true;
444                            }
445                        }
446                        Err(e) => errors.push(
447                            anyhow::Error::from(e).context(
448                                format!("While verifying commit {}",
449                                        commit_id))),
450                    }
451                }
452
453                // We do NOT insert into the cache if the commit was
454                // goodlisted.  The cache is a function of the parent
455                // policy and the children policy; goodlisting is a
456                // function of commit range.
457                if ! cache_hit && good && ! goodlisted {
458                    cache.insert(&parent_policy, commit_id)?;
459                }
460
461                if cache_hit || good || goodlisted {
462                    // Merge the commit's goodlist into the parent's
463                    // goodlist.
464                    if let Some(descendant_goodlist)
465                        = descendant_goodlist.get(&commit_id)
466                    {
467                        commit_goodlist.extend(descendant_goodlist.iter().cloned());
468                    };
469
470                    if let Some(parent_id) = parent_id {
471                        if let Some(p_goodlist)
472                            = descendant_goodlist.get_mut(&parent_id)
473                        {
474                            p_goodlist.extend(commit_goodlist.into_iter());
475                        } else if ! commit_goodlist.is_empty() {
476                            descendant_goodlist.insert(
477                                parent_id.clone(), commit_goodlist);
478                        }
479                    }
480                }
481
482                let authenticated = cache_hit || good || goodlisted;
483                if authenticated {
484                    authenticated_commits.insert(commit_id);
485                } else {
486                    unauthenticated_commits.insert(commit_id);
487                }
488                Ok(authenticated)
489            },
490            Err(e) => {
491                unauthenticated_commits.insert(commit_id);
492                errors.push(anyhow::Error::from(e).context(
493                    format!("While verifying commit {}", commit_id)));
494                Ok(false)
495            },
496        }
497    };
498
499    // We now do a topological walk from the target to the trust root.
500
501    // Assume there is no path until we prove otherwise.  (A
502    // zero-length path is already valid.)
503    let mut valid_path = trust_root == commit_range.0
504        && commit_range.0 == commit_range.1;
505
506    // Commits that we haven't processed yet.
507    let mut pending: BTreeSet<Oid> = Default::default();
508    if trust_root != commit_range.1 {
509        pending.insert(commit_range.1.clone());
510    }
511
512    'authentication: while let Some(commit_id) = pending.pop_first() {
513        let commit = git.find_commit(commit_id)?;
514
515        t!("Processing {}: {}", commit_id, commit.summary().unwrap_or(""));
516
517        let commit_info = commits.get(&commit_id).expect("added");
518        assert_eq!(commit_info.children, 0);
519        let authenticated_suffix = commit_info.authenticated_suffix;
520        let traversed_middle = commit_info.traversed_middle;
521
522        for (parent_i, parent) in commit.parents().enumerate() {
523            let parent_id = parent.id();
524            t!("Considering {} -> {} (parent #{})",
525               commit_id, parent_id, parent_i);
526
527            let parent_info = commits.get_mut(&parent_id).expect("added");
528            t!("  Parent has {} unprocessed children", parent_info.children);
529            assert!(parent_info.children > 0);
530            parent_info.children -= 1;
531
532            if authenticated_suffix {
533                t!("  Child IS on an authenticated suffix");
534            } else {
535                t!("  Child IS NOT on an authenticated suffix.");
536            }
537
538            let authenticated = if keep_going || authenticated_suffix {
539                authenticate_commit(commit_id, parent_id)
540                    .with_context(|| {
541                        format!("Authenticating {} with {}",
542                                commit_id, parent_id)
543                    })?
544            } else {
545                false
546            };
547
548            if authenticated_suffix && authenticated {
549                t!("  Parent authenticates child");
550                parent_info.authenticated_suffix = true;
551
552                if traversed_middle {
553                    parent_info.traversed_middle = true;
554                } else if middle == Some(commit_id) {
555                    t!("  Traversed {} on way to trust root.", commit_id);
556                    parent_info.traversed_middle = true;
557                }
558
559                if parent_id == trust_root {
560                    t!("  Parent is the trust root.");
561
562                    // This is the trust root.  There is no need to go
563                    // further.
564                    if ! parent_info.traversed_middle {
565                        t!("  but path was not via {}", middle.unwrap());
566                    } else {
567                        valid_path = true;
568                    }
569                    if ! keep_going {
570                        break 'authentication;
571                    }
572                }
573            }
574
575            if parent_info.children == 0 {
576                t!("  No other unprocessed children.");
577                if parent_id == trust_root {
578                    t!("  Parent is the start of the commit range, \
579                        so it doesn't need to be processed.");
580                } else if ! keep_going && ! parent_info.authenticated_suffix {
581                    t!("  Parent does not authenticate any child, \
582                        so it doesn't need to be processed.");
583                } else {
584                    t!("  Adding parent to pending.");
585                    pending.insert(parent_id);
586                }
587            }
588        }
589    }
590
591    // When using a shadow policy, we also authenticate the trust
592    // root with it.
593    if (keep_going || valid_path) && shadow_policy.is_some() {
594        t!("Verifying trust root ({}) using the shadow policy",
595           trust_root);
596        // We verify the trust root using itself?  Not quite.  We know
597        // that authenticate_commit prefers the shadow policy, and we
598        // know that a shadow policy is set.  So this will actually
599        // check that the shadow policy verifies the trust root.
600        if ! authenticate_commit(trust_root, trust_root)? {
601            valid_path = false;
602
603            if let Some(e) = errors.pop() {
604                errors.push(
605                    e.context(format!("Could not verify trust root {} \
606                                       using the specified policy",
607                                      trust_root)));
608            }
609        }
610    }
611
612    if valid_path {
613        if trust_root == commit_range.0 {
614            eprintln!(
615                "Verified that there is an authenticated path from the trust root\n\
616                 {} to {}.",
617                trust_root, commit_range.1);
618        } else {
619            eprintln!(
620                "Verified that there is an authenticated path from the trust root\n\
621                 {} via {}\n\
622                 to {}.",
623                trust_root, commit_range.0, commit_range.1);
624        }
625        Ok(())
626    } else {
627        if errors.is_empty() {
628            Err(anyhow!("Could not verify commits {}..{}{}",
629                        trust_root,
630                        if let Some(middle) = middle {
631                            format!("{}..", middle)
632                        } else {
633                            "".to_string()
634                        },
635                        commit_range.1))
636        } else {
637            let mut e = errors.swap_remove(0)
638                .context(format!("Could not verify commits {}..{}{}",
639                                 commit_range.0,
640                                 if let Some(middle) = middle {
641                                     format!("{}..", middle)
642                                 } else {
643                                     "".to_string()
644                                 },
645                                 commit_range.1));
646            if ! errors.is_empty() {
647                e = e.context(
648                    format!("{} errors occurred while verifying the commits.  \
649                             {} commits couldn't be authenticated.  \
650                             Note: not all errors are fatal.  \
651                             The first error is shown:",
652                            errors.len() + 1,
653                            unauthenticated_commits.difference(
654                                &authenticated_commits).count()));
655            }
656            Err(e)
657        }
658    }
659}
660
661// Returns the SHA512 digest of the provided bytes.
662fn sha512sum(bytes: &[u8]) -> Result<Vec<u8>> {
663    let mut digest = HashAlgorithm::SHA512.context()?;
664    digest.update(bytes);
665
666    let mut key = Vec::with_capacity(32);
667    digest.digest(&mut key)?;
668    Ok(key)
669}
670
671pub struct VerificationCache {
672    path: PathBuf,
673    set: persistent_set::Set,
674}
675
676impl VerificationCache {
677    const CONTEXT: &'static str = "SqGitVerify0";
678
679    pub fn new() -> Result<Self> {
680        let p = dirs::cache_dir().ok_or(anyhow::anyhow!("No cache dir"))?
681            .join("sq-git.verification.cache");
682        Self::open(p)
683    }
684
685    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
686        let path = path.as_ref();
687
688        Ok(VerificationCache {
689            path: path.into(),
690            set: persistent_set::Set::read(&path, Self::CONTEXT)?,
691        })
692    }
693
694    fn key(&self, policy: &[u8], commit: Oid) -> Result<persistent_set::Value> {
695        let mut digest = HashAlgorithm::SHA512.context()?;
696        digest.update(policy);
697        digest.update(commit.as_bytes());
698
699        let mut key = [0; 32];
700        digest.digest(&mut key)?;
701
702        Ok(key)
703    }
704
705    /// Returns whether (policy, commit id) is in the cache.
706    ///
707    /// If (policy, commit id) is in the cache, then it was previously
708    /// determined that the policy authenticated the commit.
709    pub fn contains(&mut self, policy: &[u8], commit: Oid) -> Result<bool> {
710        Ok(self.set.contains(&self.key(policy, commit)?)?)
711    }
712
713    /// Add (policy, commit id) to the cache.
714    ///
715    /// If (policy, commit id) is in the cache, this means that the
716    /// policy considers the commit to be authenticated.  Normally,
717    /// the policy comes from the parent commit, but it may be a
718    /// shadow policy.
719    pub fn insert(&mut self, policy: &[u8], commit: Oid) -> Result<()> {
720        self.set.insert(self.key(policy, commit)?);
721        Ok(())
722    }
723
724    pub fn persist(&mut self) -> Result<()> {
725        self.set.write(&self.path)?;
726        Ok(())
727    }
728}