sequoia_git/
verify.rs

1//! Commit-tree traversal and verification.
2
3use std::{
4    path::{Path, PathBuf},
5    collections::{
6        BTreeMap,
7        BTreeSet,
8    },
9};
10
11use anyhow::{anyhow, Context, Result};
12use git2::{
13    Repository,
14    Oid,
15};
16
17use sequoia_openpgp::{
18    self as openpgp,
19    Cert,
20    cert::{
21        amalgamation::ValidAmalgamation,
22        CertParser,
23    },
24    Fingerprint,
25    packet::{Signature, UserID},
26    parse::Parse,
27    policy::StandardPolicy,
28    types::{
29        HashAlgorithm,
30        RevocationStatus,
31        RevocationType,
32    },
33};
34
35use crate::{
36    Policy,
37    persistent_set,
38};
39
40/// Whether to trace execution by default (on stderr).
41const TRACE: bool = false;
42
43#[derive(Default, Debug)]
44pub struct VerificationResult {
45    pub signer_keys: BTreeSet<openpgp::Fingerprint>,
46    pub primary_uids: BTreeSet<UserID>,
47}
48
49pub fn verify(git: &Repository,
50              trust_root: Oid,
51              shadow_policy: Option<&[u8]>,
52              commit_range: (Oid, Oid),
53              results: &mut VerificationResult,
54              keep_going: bool,
55              mut verify_cb: impl FnMut(&Oid, Option<&Oid>, &crate::Result<Vec<crate::Result<(String, Signature, Cert, Fingerprint)>>>) -> crate::Result<()>,
56              cache: &mut VerificationCache)
57              -> Result<()> {
58    tracer!(TRACE, "verify");
59    t!("verify(_, {}, {}..{})", trust_root, commit_range.0, commit_range.1);
60
61    if shadow_policy.is_some() {
62        t!("Using a shadow policy to verify commits.");
63    } else {
64        t!("Using in-band policies to verify commits.");
65    }
66
67    // XXX: These should be passed in as arguments.
68    let p: &StandardPolicy = &StandardPolicy::new();
69    let now = std::time::SystemTime::now();
70
71    // STRATEGY
72    //
73    // We want to determine if we can authenticate the target commit
74    // starting from the trust root.  A simple approach is to try each
75    // possible path.  Unfortunately, this is exponential in the
76    // number of merges.  Consider a project where development is done
77    // on feature branches, and then merged using a merge commit.  The
78    // commit graph will look like:
79    //
80    // ```
81    //   o      <- Merge commit
82    //   | \
83    //   |  o   <- Feature branch
84    //   | /
85    //   o      <- Merge commit
86    //   | \
87    //   |  o   <- Feature branch
88    //   | /
89    //   o      <- Merge commit
90    //   | \
91    //   ...
92    // ```
93    //
94    // After 100 such merges, there are 2**100 different paths.
95    // That's intractable.
96    //
97    // Happily, we can do better.  We can determine if there is an
98    // authenticated path as a byproduct of a topological walk.  This
99    // approach limits both the work we have to do, and the space we
100    // require to O(N), where N is the number of commits preceding the
101    // target commit in the commit graph.  (If the trust root is a
102    // dominator, which is usually the case, then N is the number of
103    // commits that precede target, and follow the trust root, which
104    // is often very small.)
105    //
106    // Recall the following facts:
107    //
108    //   - A commit is authenticated if it is the trust root, or at
109    //     least one parent's policy authenticates the commit.
110    //     - Unless the signer's certificate is hard revoked.
111    //       - Unless there is an authenticated suffix that
112    //         immediately follows the commit, and a commit on that
113    //         authenticated suffix goodlists commit.
114    //
115    // In order to check the last condition, we need to know the
116    // goodlists of all the following, authenticated commits when we
117    // process the commit.  When we do a topological walk, we know
118    // that when we visit a commit, we've already visited all of its
119    // ancestors.  This means that we can easily collect the goodlists
120    // of all of the commits on authenticated paths from the commit to
121    // the target during the walk by propagating good lists from
122    // authenticated commits to their parents.  Then, when we are
123    // examining a commit, and we discover that it has been revoked,
124    // we can immediately check if it is on a relevant goodlist.
125    //
126    // To do the topological walk, we first have to do a bit of
127    // preparation.  Since we don't know a commit's children by
128    // looking at the commit (and not all children lead to the
129    // target), we have to extract that information from the graph.
130    // We do this by visiting all commits on paths from the target to
131    // the trust root, which we can do in O(N) space and time by doing
132    // a breath first search.  When we visit a commit, we increment
133    // the number of children each of its parents has.  We also use
134    // this opportunity to discover any certificates that have been
135    // hard revoked.
136    //
137    // Then we do the topological walk.  For each commit, we consider
138    // the number of unprocessed children to be the number of recorded
139    // children.  We then visit commits that have no unprocessed
140    // children.
141    //
142    // When we visit a commit, and there is an authenticated path from
143    // it to the target, we try to authenticate the commit.  That is,
144    // for each parent, we check if the parent can authenticate the
145    // commit.  If a parent authenticates the commit, but the signer's
146    // certificate has been revoked, we can immediately check whether
147    // the commit is on a goodlist.  For each parent that
148    // authenticated the commit, we merge the commit's *aggregate*
149    // goodlist into the parent's goodlist.  Finally, for each parent,
150    // we decrement the number of unprocessed children.  If there are
151    // no unprocessed children left, it is added to a pending queue,
152    // so that it can be processed.
153
154    // Return the policy associated with the commit.
155    let read_policy = |commit: &Oid| -> Result<Vec<u8>> {
156        if let Some(p) = &shadow_policy {
157            Ok(p.to_vec())
158        } else {
159            Ok(Policy::read_bytes_from_commit(git, commit)?)
160        }
161    };
162
163    // Whether we've seen the policy file before.  The key is the
164    // SHA512 digest of the policy file.
165    let mut policy_files: BTreeSet<Vec<u8>> = Default::default();
166    // Any hard revoked certificates.
167    let mut hard_revoked: BTreeSet<Fingerprint> = Default::default();
168
169    // Scans the specified commit's policy for hard revocations, and
170    // adds them to hard_revoked.
171    let mut scan_policy = |commit_id| -> Result<()> {
172        let policy_bytes = read_policy(&commit_id)?;
173        let policy_hash = sha512sum(&policy_bytes)?;
174        if policy_files.contains(&policy_hash) {
175            t!("Already scanned an identical copy of {}'s policy, skipping.",
176               commit_id);
177            return Ok(());
178        }
179        t!("Scanning {}'s policy for hard revocations", commit_id);
180
181        let policy = Policy::parse_bytes(&policy_bytes)?;
182        policy_files.insert(policy_hash);
183
184        // Scan for revoked certificates.
185        for authorization in policy.authorization().values() {
186            for cert in CertParser::from_bytes(&authorization.keyring)? {
187                let cert = if let Ok(cert) = cert {
188                    cert
189                } else {
190                    continue;
191                };
192
193                let vc = if let Ok(vc) = cert.with_policy(p, Some(now)) {
194                    vc
195                } else {
196                    continue;
197                };
198
199                let is_hard_revoked = |rs| {
200                    if let RevocationStatus::Revoked(revs) = rs {
201                        revs.iter().any(|rev| {
202                            if let Some((reason, _)) = rev.reason_for_revocation() {
203                                reason.revocation_type() == RevocationType::Hard
204                            } else {
205                                true
206                            }
207                        })
208                    } else {
209                        false
210                    }
211                };
212
213                // Check if the certificate is hard revoked.
214                if is_hard_revoked(vc.revocation_status()) {
215                    t!("Certificate {} is hard revoked, bad listing",
216                       cert.fingerprint());
217                    hard_revoked.insert(vc.fingerprint());
218                    for k in vc.keys().subkeys().for_signing() {
219                        hard_revoked.insert(k.key().fingerprint());
220                        t!("  Badlisting signing key {}",
221                           k.key().fingerprint());
222                    }
223
224                    continue;
225                }
226
227                // Check if any of the signing keys are hard revoked.
228                for k in vc.keys().subkeys().for_signing() {
229                    if is_hard_revoked(k.revocation_status()) {
230                        hard_revoked.insert(k.key().fingerprint());
231                        t!("  Signing key {} hard revoked, bad listing",
232                           k.key().fingerprint());
233                    }
234                }
235            }
236        }
237
238        Ok(())
239    };
240
241    let middle = if trust_root == commit_range.0 {
242        None
243    } else {
244        Some(commit_range.0)
245    };
246
247    struct Commit {
248        // Number of children (commits derived from this one).
249        children: usize,
250
251        // Whether there is an authenticated path from this node to
252        // the target.
253        authenticated_suffix: bool,
254
255        // Whether we still need to go via MIDDLE.
256        traversed_middle: bool,
257    }
258
259    impl Default for Commit {
260        fn default() -> Self {
261            Commit {
262                children: 0,
263                authenticated_suffix: false,
264                traversed_middle: false,
265            }
266        }
267    }
268
269    let mut commits: BTreeMap<Oid, Commit> = Default::default();
270    if trust_root != commit_range.1 {
271        commits.insert(
272            commit_range.1.clone(),
273            Commit {
274                children: 0,
275                authenticated_suffix: true,
276                traversed_middle: middle.is_none(),
277            });
278    }
279
280    // We walk the tree from the target to the trust root (using a
281    // breath first search, but it doesn't matter), and fill in
282    // COMMITS.CHILDREN.
283    {
284        // Commits that we haven't processed yet.
285        let mut pending: BTreeSet<Oid> = Default::default();
286        pending.insert(commit_range.1.clone());
287
288        // Commits that we've processed.
289        let mut processed: BTreeSet<Oid> = Default::default();
290
291        while let Some(commit_id) = pending.pop_first() {
292            processed.insert(commit_id);
293
294            let commit = git.find_commit(commit_id)?;
295
296            // Don't fail if we can't parse the policy file.
297            let _ = scan_policy(commit_id);
298
299            if commit_id == trust_root {
300                // This is the trust root.  There is no need to go
301                // further.
302                continue;
303            }
304
305            for parent in commit.parents() {
306                let parent_id = parent.id();
307
308                let info = commits.entry(parent_id).or_default();
309                info.children += 1;
310
311                if ! processed.contains(&parent_id)
312                    && ! pending.contains(&parent_id)
313                {
314                    pending.insert(parent_id);
315                }
316            }
317        }
318    }
319
320    // The union of the commit goodlists of commits on an
321    // authenticated suffix (not including this commit).  We build
322    // this up as we authenticate commits.  Since we do a topological
323    // walk, this will be complete for a given commit when we process
324    // that commit.
325    let mut descendant_goodlist: BTreeMap<Oid, BTreeSet<String>>
326        = Default::default();
327
328    let mut errors = Vec::new();
329    let mut unauthenticated_commits: BTreeSet<Oid> = Default::default();
330    let mut authenticated_commits: BTreeSet<Oid> = Default::default();
331
332    // Authenticate the commit using the specified parent.
333    //
334    // NOTE: This must only be called on a commit, if the commit is on
335    // an authenticated suffix!!!
336    let mut authenticate_commit = |commit_id, parent_id| -> Result<bool> {
337        let parent_policy = read_policy(&parent_id)?;
338        let parent_id = if commit_id == parent_id {
339            // This is only the case when verifying the trust root
340            // using the shadow policy.
341            assert_eq!(commit_id, trust_root);
342            assert!(shadow_policy.is_some());
343            None
344        } else {
345            Some(&parent_id)
346        };
347
348        // The current commit's good list.
349        let mut commit_goodlist = BTreeSet::new();
350
351        // XXX: If we have some certificates that are hard revoked,
352        // then we can't use the cache.  This is because the cache
353        // doesn't tell us what certificate was used to sign the
354        // commit, which means we can't figure out if the signer's
355        // certificate was revoked when the result is cached.
356        let (vresult, cache_hit) = if hard_revoked.is_empty()
357            && cache.contains(&parent_policy, commit_id)?
358        {
359            (Ok(vec![]), true)
360        } else {
361            let parent_policy = Policy::parse_bytes(&parent_policy)?;
362            let commit_policy = Policy::parse_bytes(read_policy(&commit_id)?)?;
363
364            commit_goodlist = commit_policy.commit_goodlist().clone();
365
366            (parent_policy.verify(git, &commit_id, &commit_policy,
367                                  &mut results.signer_keys,
368                                  &mut results.primary_uids),
369             false)
370        };
371
372        if let Err(err) = verify_cb(&commit_id, parent_id, &vresult) {
373            t!("verify_cb -> {}", err);
374            return Err(err.into());
375        }
376
377        if cache_hit {
378            // XXX: communicate this to the caller
379            // instead of eprintln.
380            let id = if let Some(parent_id) = parent_id {
381                format!("{}..{}", parent_id, commit_id)
382            } else {
383                commit_id.to_string()
384            };
385            eprintln!("{}:\n  Cached positive verification", id);
386        }
387
388        match vresult {
389            Ok(results) => {
390                // Whether the parent authenticated the commit.
391                let mut good = false;
392                // Whether the commit was goodlisted by a later
393                // commit.
394                let mut goodlisted = false;
395
396                // Whether the commit was goodlisted by the parent's
397                // policy.  (Because commits form a Merkle tree, this
398                // is only possible when we are using a shadow
399                // policy.)
400                if ! cache_hit && results.is_empty() {
401                    // XXX: communicate this to the caller
402                    // instead of eprintln.
403                    eprintln!("{}: Explicitly goodlisted", commit_id);
404                    good = true;
405                }
406
407                for r in results {
408                    match r {
409                        Ok((_, _sig, cert, signer_fpr)) => {
410                            // It looks good, but make sure the
411                            // certificate was not revoked.
412                            if hard_revoked.contains(&signer_fpr) {
413                                t!("Cert {}{} used to sign {} is revoked.",
414                                   cert.fingerprint(),
415                                   if cert.fingerprint() != signer_fpr {
416                                       format!(", key {}", signer_fpr)
417                                   } else {
418                                       "".to_string()
419                                   },
420                                   commit_id);
421
422                                // It was revoked, but perhaps the
423                                // commit was goodlisted.
424                                if descendant_goodlist.get(&commit_id)
425                                    .map(|goodlist| {
426                                        t!("  Goodlist contains: {}",
427                                           goodlist
428                                               .iter().cloned()
429                                               .collect::<Vec<String>>()
430                                               .join(", "));
431                                        goodlist.contains(&commit_id.to_string())
432                                    })
433                                    .unwrap_or(false)
434                                {
435                                    t!("But the commit was goodlisted, \
436                                        so all is good.");
437                                    goodlisted = true;
438                                }
439                            } else {
440                                t!("{} has a good signature from {}",
441                                   commit_id, cert.fingerprint());
442                                good = true;
443                            }
444                        }
445                        Err(e) => errors.push(
446                            anyhow::Error::from(e).context(
447                                format!("While verifying commit {}",
448                                        commit_id))),
449                    }
450                }
451
452                // We do NOT insert into the cache if the commit was
453                // goodlisted.  The cache is a function of the parent
454                // policy and the children policy; goodlisting is a
455                // function of commit range.
456                if ! cache_hit && good && ! goodlisted {
457                    cache.insert(&parent_policy, commit_id)?;
458                }
459
460                if cache_hit || good || goodlisted {
461                    // Merge the commit's goodlist into the parent's
462                    // goodlist.
463                    if let Some(descendant_goodlist)
464                        = descendant_goodlist.get(&commit_id)
465                    {
466                        commit_goodlist.extend(descendant_goodlist.iter().cloned());
467                    };
468
469                    if let Some(parent_id) = parent_id {
470                        if let Some(p_goodlist)
471                            = descendant_goodlist.get_mut(&parent_id)
472                        {
473                            p_goodlist.extend(commit_goodlist.into_iter());
474                        } else if ! commit_goodlist.is_empty() {
475                            descendant_goodlist.insert(
476                                parent_id.clone(), commit_goodlist);
477                        }
478                    }
479                }
480
481                let authenticated = cache_hit || good || goodlisted;
482                if authenticated {
483                    authenticated_commits.insert(commit_id);
484                } else {
485                    unauthenticated_commits.insert(commit_id);
486                }
487                Ok(authenticated)
488            },
489            Err(e) => {
490                unauthenticated_commits.insert(commit_id);
491                errors.push(anyhow::Error::from(e).context(
492                    format!("While verifying commit {}", commit_id)));
493                Ok(false)
494            },
495        }
496    };
497
498    // We now do a topological walk from the target to the trust root.
499
500    // Assume there is no path until we prove otherwise.  (A
501    // zero-length path is already valid.)
502    let mut valid_path = trust_root == commit_range.0
503        && commit_range.0 == commit_range.1;
504
505    // Commits that we haven't processed yet.
506    let mut pending: BTreeSet<Oid> = Default::default();
507    if trust_root != commit_range.1 {
508        pending.insert(commit_range.1.clone());
509    }
510
511    'authentication: while let Some(commit_id) = pending.pop_first() {
512        let commit = git.find_commit(commit_id)?;
513
514        t!("Processing {}: {}", commit_id, commit.summary().unwrap_or(""));
515
516        let commit_info = commits.get(&commit_id).expect("added");
517        assert_eq!(commit_info.children, 0);
518        let authenticated_suffix = commit_info.authenticated_suffix;
519        let traversed_middle = commit_info.traversed_middle;
520
521        for (parent_i, parent) in commit.parents().enumerate() {
522            let parent_id = parent.id();
523            t!("Considering {} -> {} (parent #{})",
524               commit_id, parent_id, parent_i);
525
526            let parent_info = commits.get_mut(&parent_id).expect("added");
527            t!("  Parent has {} unprocessed children", parent_info.children);
528            assert!(parent_info.children > 0);
529            parent_info.children -= 1;
530
531            if authenticated_suffix {
532                t!("  Child IS on an authenticated suffix");
533            } else {
534                t!("  Child IS NOT on an authenticated suffix.");
535            }
536
537            let authenticated = if keep_going || authenticated_suffix {
538                authenticate_commit(commit_id, parent_id)
539                    .with_context(|| {
540                        format!("Authenticating {} with {}",
541                                commit_id, parent_id)
542                    })?
543            } else {
544                false
545            };
546
547            if authenticated_suffix && authenticated {
548                t!("  Parent authenticates child");
549                parent_info.authenticated_suffix = true;
550
551                if traversed_middle {
552                    parent_info.traversed_middle = true;
553                } else if middle == Some(commit_id) {
554                    t!("  Traversed {} on way to trust root.", commit_id);
555                    parent_info.traversed_middle = true;
556                }
557
558                if parent_id == trust_root {
559                    t!("  Parent is the trust root.");
560
561                    // This is the trust root.  There is no need to go
562                    // further.
563                    if ! parent_info.traversed_middle {
564                        t!("  but path was not via {}", middle.unwrap());
565                    } else {
566                        valid_path = true;
567                    }
568                    if ! keep_going {
569                        break 'authentication;
570                    }
571                }
572            }
573
574            if parent_info.children == 0 {
575                t!("  No other unprocessed children.");
576                if parent_id == trust_root {
577                    t!("  Parent is the start of the commit range, \
578                        so it doesn't need to be processed.");
579                } else if ! keep_going && ! parent_info.authenticated_suffix {
580                    t!("  Parent does not authenticate any child, \
581                        so it doesn't need to be processed.");
582                } else {
583                    t!("  Adding parent to pending.");
584                    pending.insert(parent_id);
585                }
586            }
587        }
588    }
589
590    // When using a shadow policy, we also authenticate the trust
591    // root with it.
592    if (keep_going || valid_path) && shadow_policy.is_some() {
593        t!("Verifying trust root ({}) using the shadow policy",
594           trust_root);
595        // We verify the trust root using itself?  Not quite.  We know
596        // that authenticate_commit prefers the shadow policy, and we
597        // know that a shadow policy is set.  So this will actually
598        // check that the shadow policy verifies the trust root.
599        if ! authenticate_commit(trust_root, trust_root)? {
600            valid_path = false;
601
602            if let Some(e) = errors.pop() {
603                errors.push(
604                    e.context(format!("Could not verify trust root {} \
605                                       using the specified policy",
606                                      trust_root)));
607            }
608        }
609    }
610
611    if valid_path {
612        if trust_root == commit_range.0 {
613            eprintln!(
614                "Verified that there is an authenticated path from the trust root\n\
615                 {} to {}.",
616                trust_root, commit_range.1);
617        } else {
618            eprintln!(
619                "Verified that there is an authenticated path from the trust root\n\
620                 {} via {}\n\
621                 to {}.",
622                trust_root, commit_range.0, commit_range.1);
623        }
624        Ok(())
625    } else {
626        if errors.is_empty() {
627            Err(anyhow!("Could not verify commits {}..{}{}",
628                        trust_root,
629                        if let Some(middle) = middle {
630                            format!("{}..", middle)
631                        } else {
632                            "".to_string()
633                        },
634                        commit_range.1))
635        } else {
636            let mut e = errors.swap_remove(0)
637                .context(format!("Could not verify commits {}..{}{}",
638                                 commit_range.0,
639                                 if let Some(middle) = middle {
640                                     format!("{}..", middle)
641                                 } else {
642                                     "".to_string()
643                                 },
644                                 commit_range.1));
645            if ! errors.is_empty() {
646                e = e.context(
647                    format!("{} errors occurred while verifying the commits.  \
648                             {} commits couldn't be authenticated.  \
649                             Note: not all errors are fatal.  \
650                             The first error is shown:",
651                            errors.len() + 1,
652                            unauthenticated_commits.difference(
653                                &authenticated_commits).count()));
654            }
655            Err(e)
656        }
657    }
658}
659
660// Returns the SHA512 digest of the provided bytes.
661fn sha512sum(bytes: &[u8]) -> Result<Vec<u8>> {
662    let mut digest = HashAlgorithm::SHA512.context()?.for_digest();
663    digest.update(bytes);
664
665    let mut key = Vec::with_capacity(32);
666    digest.digest(&mut key)?;
667    Ok(key)
668}
669
670pub struct VerificationCache {
671    path: PathBuf,
672    set: persistent_set::Set,
673}
674
675impl VerificationCache {
676    const CONTEXT: &'static str = "SqGitVerify0";
677
678    pub fn new() -> Result<Self> {
679        let p = dirs::cache_dir().ok_or(anyhow::anyhow!("No cache dir"))?
680            .join("sq-git.verification.cache");
681        Self::open(p)
682    }
683
684    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
685        let path = path.as_ref();
686
687        Ok(VerificationCache {
688            path: path.into(),
689            set: persistent_set::Set::read(&path, Self::CONTEXT)?,
690        })
691    }
692
693    fn key(&self, policy: &[u8], commit: Oid) -> Result<persistent_set::Value> {
694        let mut digest = HashAlgorithm::SHA512.context()?.for_digest();
695        digest.update(policy);
696        digest.update(commit.as_bytes());
697
698        let mut key = [0; 32];
699        digest.digest(&mut key)?;
700
701        Ok(key)
702    }
703
704    /// Returns whether (policy, commit id) is in the cache.
705    ///
706    /// If (policy, commit id) is in the cache, then it was previously
707    /// determined that the policy authenticated the commit.
708    pub fn contains(&mut self, policy: &[u8], commit: Oid) -> Result<bool> {
709        Ok(self.set.contains(&self.key(policy, commit)?)?)
710    }
711
712    /// Add (policy, commit id) to the cache.
713    ///
714    /// If (policy, commit id) is in the cache, this means that the
715    /// policy considers the commit to be authenticated.  Normally,
716    /// the policy comes from the parent commit, but it may be a
717    /// shadow policy.
718    pub fn insert(&mut self, policy: &[u8], commit: Oid) -> Result<()> {
719        self.set.insert(self.key(policy, commit)?);
720        Ok(())
721    }
722
723    pub fn persist(&mut self) -> Result<()> {
724        self.set.write(&self.path)?;
725        Ok(())
726    }
727}