Skip to main content

anvil_ssh/
cert_authority.rs

1// SPDX-License-Identifier: GPL-3.0-or-later
2// Rust guideline compliant 2026-03-30
3//! `@cert-authority` and `@revoked` markers in `known_hosts`-style files
4//! (PRD §5.8.3 / FR-60, FR-64).
5//!
6//! M14 ships the *parsing* surface plus the M14.2 revoked-key
7//! enforcement in [`crate::session::AnvilSession::check_server_key`].
8//! The actual cert-during-handshake verification (FR-61, FR-62, FR-63)
9//! is deferred until russh exposes the server's certificate to the
10//! `check_server_key` callback — russh 0.59's KEX negotiation does not
11//! advertise `*-cert-v01@openssh.com` as a host-key algorithm, so the
12//! callback only ever sees plain public keys. See the M14 plan for the
13//! upstream blocker.
14//!
15//! # File format
16//!
17//! Three line shapes are recognized:
18//!
19//! ```text
20//! # Direct fingerprint (Anvil convention, predates M14):
21//! github.com SHA256:uNiVztksCsDhcc0u9e8BujQXVUpKZIDTMczCvj3tD2s
22//!
23//! # Cert-authority CA pubkey (OpenSSH convention):
24//! @cert-authority *.example.com ssh-ed25519 AAAAC3NzaC1lZD... ca-key
25//!
26//! # Revoked specific key (Anvil shorthand: SHA256: form):
27//! @revoked example.com SHA256:abcd...
28//! ```
29//!
30//! Multiple comma-separated host patterns on one line are split into
31//! multiple entries.  Comment lines (`#`) and blanks are skipped.
32//!
33//! ## Hashed-host support (M19, FR-84)
34//!
35//! OpenSSH's `HashKnownHosts yes` setting replaces the plaintext host
36//! column with `|1|<base64-salt>|<base64-hmac-sha1>` so that an attacker
37//! who reads the file cannot enumerate which hosts the user has
38//! connected to.  Anvil parses these into [`HashedHost`] values and
39//! stores them on [`KnownHostsFile::hashed`]; the per-entry
40//! [`HashedHost::matches`] method runs HMAC-SHA1 against a candidate
41//! hostname to test for membership at lookup time.  HMAC-SHA1 here is
42//! a *privacy* primitive (file-readable enumeration resistance), not a
43//! *security* primitive — SHA-1 collisions don't matter because the
44//! salt is per-line and 160 bits, the input is a low-entropy hostname,
45//! and the threat model is exactly OpenSSH's: hide the hostname list
46//! from a casual file reader.
47
48use base64::{engine::general_purpose::STANDARD as BASE64, Engine as _};
49use hmac::{Hmac, Mac};
50use russh::keys::{ssh_key::PublicKey, HashAlg};
51use sha1::Sha1;
52
53use crate::error::AnvilError;
54
55/// One `@cert-authority` line: a CA public key plus the host pattern
56/// it applies to.
57///
58/// Comma-separated patterns on the source line produce one
59/// [`CertAuthority`] per pattern, sharing the underlying pubkey blob.
60#[derive(Debug, Clone, PartialEq, Eq)]
61pub struct CertAuthority {
62    /// Raw glob pattern from the `known_hosts` line, e.g. `*.example.com`
63    /// or `bastion`.  Compared with [`crate::ssh_config::lexer::wildcard_match`]
64    /// at lookup time.
65    pub host_pattern: String,
66    /// Algorithm string ("ssh-ed25519", "ssh-rsa", "ecdsa-sha2-nistp256", …).
67    pub algorithm: String,
68    /// SHA-256 fingerprint of the CA pubkey, in OpenSSH format
69    /// (`SHA256:base64...`).  Surfaces in `gitway config show --json`
70    /// for audit and acts as the canonical identity of the CA.
71    pub fingerprint: String,
72    /// Re-serialised OpenSSH public key string (`algorithm AAAA...
73    /// comment`).  Preserved verbatim so downstream cert-validation
74    /// (deferred to russh upstream) can re-parse without round-tripping
75    /// through a wire-format blob.
76    pub openssh: String,
77}
78
79/// One `@revoked` line: a specific key fingerprint blocklisted for the
80/// matching host pattern.
81///
82/// The Anvil shorthand uses the `SHA256:...` fingerprint form rather
83/// than the full OpenSSH pubkey blob — this matches the rest of the
84/// `known_hosts` file's existing convention.  OpenSSH's full
85/// pubkey-blob form (`@revoked host algorithm AAAA...`) is documented
86/// as a follow-up if users ask.
87#[derive(Debug, Clone, PartialEq, Eq)]
88pub struct RevokedEntry {
89    /// Raw glob pattern.  `*` to revoke unconditionally.
90    pub host_pattern: String,
91    /// Fingerprint string, e.g. `SHA256:uNiVztksCs...`.  Compared
92    /// case-sensitively against the presented key's fingerprint.
93    pub fingerprint: String,
94}
95
96/// One direct host-fingerprint pin (`host SHA256:fp`).  Predates M14;
97/// kept here so [`parse_known_hosts`] can return everything in one
98/// pass instead of forcing the caller to re-iterate the file.
99#[derive(Debug, Clone, PartialEq, Eq)]
100pub struct DirectHostKey {
101    pub host_pattern: String,
102    pub fingerprint: String,
103}
104
105/// One `HashKnownHosts yes` entry (M19, FR-84).
106///
107/// OpenSSH replaces the plaintext host column with `|1|salt|hash` so a
108/// casual reader of the `known_hosts` file cannot enumerate connected
109/// hosts.  The salt is 20 random bytes (matching HMAC-SHA1's output
110/// width); the hash is `HMAC-SHA1(key=salt, data=hostname)`.  Use
111/// [`HashedHost::matches`] to test a candidate hostname against the
112/// stored hash.
113///
114/// Multiple comma-separated `|1|...` tokens on one source line produce
115/// one [`HashedHost`] per token, all sharing the same `fingerprint`.
116#[derive(Debug, Clone, PartialEq, Eq)]
117pub struct HashedHost {
118    /// Per-line random salt used as the HMAC-SHA1 key.  Always 20 bytes.
119    pub salt: [u8; 20],
120    /// `HMAC-SHA1(salt, hostname)` for the hostname this entry covers.
121    /// Always 20 bytes.
122    pub hash: [u8; 20],
123    /// Fingerprint of the host key, e.g. `SHA256:uNiVztksCs...`.
124    /// Shared across every [`HashedHost`] derived from the same source
125    /// line.
126    pub fingerprint: String,
127}
128
129impl HashedHost {
130    /// Returns `true` iff `host` is the hostname this entry encodes.
131    ///
132    /// Runs `HMAC-SHA1(self.salt, host.as_bytes())` and compares against
133    /// the stored hash with constant-time equality (via `HMAC::verify`,
134    /// which uses [`subtle`] internally).  False on mismatch — never
135    /// errors.
136    #[must_use]
137    pub fn matches(&self, host: &str) -> bool {
138        let Ok(mut mac) = <Hmac<Sha1>>::new_from_slice(&self.salt) else {
139            // `new_from_slice` only fails on key-length restrictions
140            // that HMAC-SHA1 does not enforce, so this branch is
141            // effectively dead.  Defensive return rather than panic.
142            return false;
143        };
144        mac.update(host.as_bytes());
145        mac.verify_slice(&self.hash).is_ok()
146    }
147}
148
149/// Fully-parsed view of one `known_hosts`-style file.
150///
151/// Returned by [`parse_known_hosts`].  Empty vectors are the natural
152/// state when a file contains no entries of that class.
153#[derive(Debug, Clone, Default, PartialEq, Eq)]
154pub struct KnownHostsFile {
155    pub direct: Vec<DirectHostKey>,
156    pub cert_authorities: Vec<CertAuthority>,
157    pub revoked: Vec<RevokedEntry>,
158    /// Hashed direct entries (M19, FR-84).  Use
159    /// [`HashedHost::matches`] to query by candidate hostname; the
160    /// vector itself preserves source order.
161    pub hashed: Vec<HashedHost>,
162}
163
164/// Parses `content` (the contents of a `known_hosts`-style file) into
165/// the three classes of entries Anvil understands.
166///
167/// Errors only on hard malformation — a `@cert-authority` line whose
168/// pubkey string cannot be parsed as OpenSSH format.  Direct-fingerprint
169/// lines that do not split into `host fingerprint` are silently skipped
170/// (matches the pre-M14 lenient parser).
171///
172/// # Errors
173/// [`AnvilError::invalid_config`] when a `@cert-authority` pubkey
174/// string fails to parse as OpenSSH (e.g. unknown algorithm, malformed
175/// base64).
176pub fn parse_known_hosts(content: &str) -> Result<KnownHostsFile, AnvilError> {
177    let mut out = KnownHostsFile::default();
178
179    for (idx, raw) in content.lines().enumerate() {
180        let line = raw.trim();
181        if line.is_empty() || line.starts_with('#') {
182            continue;
183        }
184        let line_no = idx + 1;
185
186        if let Some(rest) = strip_marker_ci(line, "@cert-authority") {
187            parse_cert_authority_line(rest, line_no, &mut out)?;
188            continue;
189        }
190        if let Some(rest) = strip_marker_ci(line, "@revoked") {
191            parse_revoked_line(rest, line_no, &mut out);
192            continue;
193        }
194
195        // Direct line (plaintext or hashed): `host[,host2,…] SHA256:fp`.
196        // Each comma-separated host token is classified independently
197        // — `|1|salt|hash` tokens land in `hashed`, others in `direct`.
198        let mut parts = line.splitn(2, char::is_whitespace);
199        let Some(host_part) = parts.next() else {
200            continue;
201        };
202        let Some(fp_part) = parts.next() else {
203            continue;
204        };
205        let fp = fp_part.trim();
206        if fp.is_empty() {
207            continue;
208        }
209        for host_token in split_host_patterns(host_part) {
210            if host_token.starts_with("|1|") {
211                match parse_hashed_token(&host_token) {
212                    Some((salt, hash)) => {
213                        out.hashed.push(HashedHost {
214                            salt,
215                            hash,
216                            fingerprint: fp.to_owned(),
217                        });
218                    }
219                    None => {
220                        log::warn!(
221                            "known_hosts: line {line_no}: malformed hashed token '{host_token}'; \
222                             skipping (expected '|1|<base64-salt>|<base64-hash>')",
223                        );
224                    }
225                }
226            } else {
227                out.direct.push(DirectHostKey {
228                    host_pattern: host_token,
229                    fingerprint: fp.to_owned(),
230                });
231            }
232        }
233    }
234
235    Ok(out)
236}
237
238/// Decodes a single `|1|<base64-salt>|<base64-hash>` token into its
239/// 20-byte salt + 20-byte hash components.
240///
241/// Returns `None` for any deviation from the expected form: missing
242/// `|1|` prefix, missing inner `|` separator, base64 decode failure,
243/// or wrong byte length.  Callers log + skip on `None`.
244fn parse_hashed_token(token: &str) -> Option<([u8; 20], [u8; 20])> {
245    let rest = token.strip_prefix("|1|")?;
246    let (salt_b64, hash_b64) = rest.split_once('|')?;
247    let salt_bytes = BASE64.decode(salt_b64.as_bytes()).ok()?;
248    let hash_bytes = BASE64.decode(hash_b64.as_bytes()).ok()?;
249    let salt: [u8; 20] = salt_bytes.try_into().ok()?;
250    let hash: [u8; 20] = hash_bytes.try_into().ok()?;
251    Some((salt, hash))
252}
253
254/// Returns the rest of `line` after `marker`, but only if `marker`
255/// appears at the start of `line` followed by whitespace
256/// (case-insensitive on the marker itself, matching OpenSSH).
257fn strip_marker_ci<'a>(line: &'a str, marker: &str) -> Option<&'a str> {
258    if line.len() <= marker.len() {
259        return None;
260    }
261    let head = line.get(..marker.len())?;
262    if !head.eq_ignore_ascii_case(marker) {
263        return None;
264    }
265    let rest = &line[marker.len()..];
266    let trimmed = rest.trim_start();
267    if !rest.starts_with(char::is_whitespace) || trimmed.is_empty() {
268        // `@cert-authorityFOO ...` — must be `@cert-authority<space>...`.
269        return None;
270    }
271    Some(trimmed)
272}
273
274/// Parses the body of a `@cert-authority` line (everything after the
275/// marker token + whitespace).  Format: `host_pattern[s] algorithm
276/// AAAA... [comment]`.
277fn parse_cert_authority_line(
278    rest: &str,
279    line_no: usize,
280    out: &mut KnownHostsFile,
281) -> Result<(), AnvilError> {
282    let mut parts = rest.splitn(2, char::is_whitespace);
283    let Some(host_part) = parts.next() else {
284        return Err(AnvilError::invalid_config(format!(
285            "known_hosts:{line_no}: @cert-authority line missing host pattern",
286        )));
287    };
288    let Some(key_part) = parts.next() else {
289        return Err(AnvilError::invalid_config(format!(
290            "known_hosts:{line_no}: @cert-authority line missing pubkey",
291        )));
292    };
293
294    let key_part = key_part.trim();
295    let pk = PublicKey::from_openssh(key_part).map_err(|e| {
296        AnvilError::invalid_config(format!(
297            "known_hosts:{line_no}: failed to parse @cert-authority pubkey: {e}",
298        ))
299    })?;
300    let algorithm = pk.algorithm().as_str().to_owned();
301    let fingerprint = pk.fingerprint(HashAlg::Sha256).to_string();
302
303    for host in split_host_patterns(host_part) {
304        out.cert_authorities.push(CertAuthority {
305            host_pattern: host,
306            algorithm: algorithm.clone(),
307            fingerprint: fingerprint.clone(),
308            openssh: key_part.to_owned(),
309        });
310    }
311    Ok(())
312}
313
314/// Parses the body of a `@revoked` line.  Format:
315/// `host_pattern[s] SHA256:fingerprint`.
316fn parse_revoked_line(rest: &str, line_no: usize, out: &mut KnownHostsFile) {
317    let mut parts = rest.splitn(2, char::is_whitespace);
318    let Some(host_part) = parts.next() else {
319        log::warn!("known_hosts:{line_no}: @revoked line missing host pattern");
320        return;
321    };
322    let Some(fp_part) = parts.next() else {
323        log::warn!("known_hosts:{line_no}: @revoked line missing fingerprint");
324        return;
325    };
326    let fp = fp_part.trim();
327    if fp.is_empty() {
328        log::warn!("known_hosts:{line_no}: @revoked line has empty fingerprint");
329        return;
330    }
331    for host in split_host_patterns(host_part) {
332        out.revoked.push(RevokedEntry {
333            host_pattern: host,
334            fingerprint: fp.to_owned(),
335        });
336    }
337}
338
339/// Splits a comma-separated host-pattern column into individual
340/// patterns, trimming whitespace and skipping empties.
341fn split_host_patterns(column: &str) -> Vec<String> {
342    column
343        .split(',')
344        .map(str::trim)
345        .filter(|s| !s.is_empty())
346        .map(str::to_owned)
347        .collect()
348}
349
350#[cfg(test)]
351mod tests {
352    use super::*;
353
354    #[test]
355    fn empty_input_yields_default() {
356        let parsed = parse_known_hosts("").expect("empty");
357        assert_eq!(parsed, KnownHostsFile::default());
358    }
359
360    #[test]
361    fn comments_and_blanks_skipped() {
362        let parsed = parse_known_hosts(
363            "# top comment\n\
364             \n\
365             # another\n",
366        )
367        .expect("parse");
368        assert_eq!(parsed, KnownHostsFile::default());
369    }
370
371    #[test]
372    fn direct_fingerprint_line() {
373        let parsed =
374            parse_known_hosts("github.com SHA256:uNiVztksCsDhcc0u9e8BujQXVUpKZIDTMczCvj3tD2s\n")
375                .expect("parse");
376        assert_eq!(parsed.direct.len(), 1);
377        assert_eq!(parsed.direct[0].host_pattern, "github.com");
378        assert_eq!(
379            parsed.direct[0].fingerprint,
380            "SHA256:uNiVztksCsDhcc0u9e8BujQXVUpKZIDTMczCvj3tD2s",
381        );
382        assert!(parsed.cert_authorities.is_empty());
383        assert!(parsed.revoked.is_empty());
384    }
385
386    #[test]
387    fn comma_separated_hosts_split_into_multiple_entries() {
388        let parsed =
389            parse_known_hosts("github.com,gitlab.com,codeberg.org SHA256:abcd\n").expect("parse");
390        assert_eq!(parsed.direct.len(), 3);
391        let hosts: Vec<&str> = parsed
392            .direct
393            .iter()
394            .map(|d| d.host_pattern.as_str())
395            .collect();
396        assert_eq!(hosts, vec!["github.com", "gitlab.com", "codeberg.org"]);
397    }
398
399    #[test]
400    fn cert_authority_line_parsed() {
401        // Real ed25519 pubkey blob (32-byte point base64-encoded with the
402        // "ssh-ed25519" header).  Doubles as a roundtrip check that
403        // ssh_key::PublicKey accepts the input we emit.
404        let parsed = parse_known_hosts(
405            "@cert-authority *.example.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILM+rvN+ot98qgEN796jTiQfZfG1KaT0PtFDJ/XFSqti ca-key\n",
406        )
407        .expect("parse");
408        assert_eq!(parsed.cert_authorities.len(), 1);
409        let ca = &parsed.cert_authorities[0];
410        assert_eq!(ca.host_pattern, "*.example.com");
411        assert_eq!(ca.algorithm, "ssh-ed25519");
412        assert!(
413            ca.fingerprint.starts_with("SHA256:"),
414            "expected SHA256 fp, got: {}",
415            ca.fingerprint,
416        );
417        assert!(parsed.direct.is_empty());
418        assert!(parsed.revoked.is_empty());
419    }
420
421    #[test]
422    fn cert_authority_marker_case_insensitive() {
423        let parsed = parse_known_hosts(
424            "@CERT-AUTHORITY *.example.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILM+rvN+ot98qgEN796jTiQfZfG1KaT0PtFDJ/XFSqti\n",
425        )
426        .expect("parse");
427        assert_eq!(parsed.cert_authorities.len(), 1);
428    }
429
430    #[test]
431    fn cert_authority_invalid_pubkey_errors() {
432        let err = parse_known_hosts("@cert-authority *.example.com ssh-ed25519 not-base64-data\n")
433            .expect_err("malformed pubkey");
434        let msg = format!("{err}");
435        assert!(
436            msg.contains("@cert-authority"),
437            "expected error to mention @cert-authority, got: {msg}",
438        );
439    }
440
441    #[test]
442    fn revoked_line_parsed() {
443        let parsed =
444            parse_known_hosts("@revoked example.com SHA256:abcdefghijklmnop\n").expect("parse");
445        assert_eq!(parsed.revoked.len(), 1);
446        assert_eq!(parsed.revoked[0].host_pattern, "example.com");
447        assert_eq!(parsed.revoked[0].fingerprint, "SHA256:abcdefghijklmnop");
448        assert!(parsed.direct.is_empty());
449        assert!(parsed.cert_authorities.is_empty());
450    }
451
452    #[test]
453    fn revoked_marker_case_insensitive() {
454        let parsed = parse_known_hosts("@REVOKED * SHA256:a\n").expect("parse");
455        assert_eq!(parsed.revoked.len(), 1);
456        assert_eq!(parsed.revoked[0].host_pattern, "*");
457    }
458
459    #[test]
460    fn revoked_with_comma_hosts() {
461        let parsed =
462            parse_known_hosts("@revoked a.example.com,b.example.com SHA256:abc\n").expect("parse");
463        assert_eq!(parsed.revoked.len(), 2);
464        assert_eq!(parsed.revoked[0].host_pattern, "a.example.com");
465        assert_eq!(parsed.revoked[1].host_pattern, "b.example.com");
466    }
467
468    #[test]
469    fn revoked_missing_fingerprint_logged_and_skipped() {
470        // Truncated `@revoked example.com` (no fingerprint) — soft-skip
471        // with a warn rather than error: matches the leniency of the
472        // existing direct-fingerprint parser.
473        let parsed = parse_known_hosts("@revoked example.com\n").expect("parse");
474        assert!(parsed.revoked.is_empty());
475    }
476
477    #[test]
478    fn hashed_entry_skipped_silently() {
479        let parsed = parse_known_hosts(
480            "|1|abcdef==|fedcba== ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILM+rvN+ot98qgEN796jTiQfZfG1KaT0PtFDJ/XFSqti\n",
481        )
482        .expect("parse");
483        // We don't try to decode hashed entries; they just don't
484        // contribute.  Documented as a follow-up.
485        assert!(parsed.direct.is_empty());
486        assert!(parsed.cert_authorities.is_empty());
487    }
488
489    #[test]
490    fn mixed_file_three_classes() {
491        let parsed = parse_known_hosts(
492            "# header\n\
493             github.com SHA256:fp1\n\
494             @cert-authority *.example.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILM+rvN+ot98qgEN796jTiQfZfG1KaT0PtFDJ/XFSqti ca\n\
495             @revoked github.com SHA256:bad-fp\n\
496             gitlab.com SHA256:fp2\n",
497        )
498        .expect("parse");
499        assert_eq!(parsed.direct.len(), 2);
500        assert_eq!(parsed.cert_authorities.len(), 1);
501        assert_eq!(parsed.revoked.len(), 1);
502        assert_eq!(parsed.direct[0].host_pattern, "github.com");
503        assert_eq!(parsed.direct[1].host_pattern, "gitlab.com");
504        assert_eq!(parsed.cert_authorities[0].host_pattern, "*.example.com");
505        assert_eq!(parsed.revoked[0].host_pattern, "github.com");
506    }
507
508    #[test]
509    fn marker_without_trailing_space_not_treated_as_marker() {
510        // `@cert-authoritySomething` should NOT match the marker — the
511        // marker requires whitespace after.  Such a line is treated as
512        // a malformed direct line and silently skipped.
513        let parsed = parse_known_hosts("@cert-authoritynot-a-marker\n").expect("parse");
514        assert_eq!(parsed, KnownHostsFile::default());
515    }
516
517    #[test]
518    fn whitespace_around_fields_tolerated() {
519        let parsed = parse_known_hosts("  github.com\tSHA256:fp\n").expect("parse");
520        assert_eq!(parsed.direct.len(), 1);
521        assert_eq!(parsed.direct[0].host_pattern, "github.com");
522        assert_eq!(parsed.direct[0].fingerprint, "SHA256:fp");
523    }
524}