Skip to main content

anvil_ssh/
hostkey.rs

1// SPDX-License-Identifier: GPL-3.0-or-later
2// Rust guideline compliant 2026-03-30
3//! SSH host-key fingerprint pinning for well-known Git hosting services (FR-6, FR-7).
4//!
5//! Gitway embeds the published SHA-256 fingerprints for GitHub, GitLab, and
6//! Codeberg.  On every connection the server's presented key is hashed and the
7//! resulting fingerprint is compared against the embedded list for that host.
8//! Any mismatch aborts the connection immediately.
9//!
10//! # Custom / self-hosted instances
11//!
12//! Fingerprints for any host not listed below can be added via a
13//! `known_hosts`-style file at `~/.config/gitway/known_hosts` (FR-7).
14//! Each non-comment line must follow the format:
15//!
16//! ```text
17//! hostname SHA256:<base64-encoded-fingerprint>
18//! ```
19//!
20//! # Fingerprint sources
21//!
22//! - GitHub:   <https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/githubs-ssh-key-fingerprints>
23//! - GitLab:   <https://docs.gitlab.com/ee/user/gitlab_com/index.html#ssh-host-keys-fingerprints>
24//! - Codeberg: <https://docs.codeberg.org/security/ssh-fingerprint/>
25//!
26//! Last verified: 2026-04-11
27
28use std::path::Path;
29
30use base64::{engine::general_purpose::STANDARD as BASE64, Engine as _};
31use hmac::{Hmac, Mac};
32use rand_core::{OsRng, RngCore};
33use sha1::Sha1;
34
35use crate::cert_authority::{parse_known_hosts, CertAuthority, KnownHostsFile, RevokedEntry};
36use crate::error::AnvilError;
37use crate::ssh_config::lexer::wildcard_match;
38
39// ── Well-known host constants ─────────────────────────────────────────────────
40
41/// Primary GitHub SSH host (FR-1).
42pub const DEFAULT_GITHUB_HOST: &str = "github.com";
43
44/// Fallback GitHub SSH host when port 22 is unavailable (FR-1).
45///
46/// GitHub routes SSH traffic through HTTPS port 443 on this hostname.
47pub const GITHUB_FALLBACK_HOST: &str = "ssh.github.com";
48
49/// Primary GitLab SSH host.
50pub const DEFAULT_GITLAB_HOST: &str = "gitlab.com";
51
52/// Fallback GitLab SSH host when port 22 is unavailable.
53///
54/// GitLab routes SSH traffic through HTTPS port 443 on this hostname.
55pub const GITLAB_FALLBACK_HOST: &str = "altssh.gitlab.com";
56
57/// Primary Codeberg SSH host.
58pub const DEFAULT_CODEBERG_HOST: &str = "codeberg.org";
59
60/// Default SSH port used by all providers.
61///
62/// Changing to a value below 1024 requires elevated privileges on most
63/// POSIX systems; only override this when using a self-hosted instance
64/// with a non-standard port.
65pub const DEFAULT_PORT: u16 = 22;
66
67/// HTTPS-port fallback for providers that support it (GitHub, GitLab).
68pub const FALLBACK_PORT: u16 = 443;
69
70// ── Legacy alias kept for backward compatibility ──────────────────────────────
71
72/// Alias for [`GITHUB_FALLBACK_HOST`]; retained so existing callers that
73/// reference the old name continue to compile.
74#[deprecated(since = "0.2.0", note = "use GITHUB_FALLBACK_HOST instead")]
75pub const FALLBACK_HOST: &str = GITHUB_FALLBACK_HOST;
76
77// ── Embedded fingerprints ─────────────────────────────────────────────────────
78
79/// GitHub's published SSH host-key fingerprints (SHA-256, FR-6).
80///
81/// Contains one entry per key type in `SHA256:<base64>` format:
82/// - Ed25519  (index 0)
83/// - ECDSA    (index 1)
84/// - RSA      (index 2)
85///
86/// **If GitHub rotates its keys, update this constant and cut a patch release.**
87pub const GITHUB_FINGERPRINTS: &[&str] = &[
88    "SHA256:+DiY3wvvV6TuJJhbpZisF/zLDA0zPMSvHdkr4UvCOqU", // Ed25519
89    "SHA256:p2QAMXNIC1TJYWeIOttrVc98/R1BUFWu3/LiyKgUfQM", // ECDSA-SHA2-nistp256
90    "SHA256:uNiVztksCsDhcc0u9e8BujQXVUpKZIDTMczCvj3tD2s", // RSA
91];
92
93/// GitLab.com's published SSH host-key fingerprints (SHA-256).
94///
95/// Contains one entry per key type in `SHA256:<base64>` format:
96/// - Ed25519  (index 0)
97/// - ECDSA    (index 1)
98/// - RSA      (index 2)
99///
100/// **If GitLab rotates its keys, update this constant and cut a patch release.**
101pub const GITLAB_FINGERPRINTS: &[&str] = &[
102    "SHA256:eUXGGm1YGsMAS7vkcx6JOJdOGHPem5gQp4taiCfCLB8", // Ed25519
103    "SHA256:HbW3g8zUjNSksFbqTiUWPWg2Bq1x8xdGUrliXFzSnUw", // ECDSA-SHA2-nistp256
104    "SHA256:ROQFvPThGrW4RuWLoL9tq9I9zJ42fK4XywyRtbOz/EQ", // RSA
105];
106
107/// Codeberg.org's published SSH host-key fingerprints (SHA-256).
108///
109/// Contains one entry per key type in `SHA256:<base64>` format:
110/// - Ed25519  (index 0)
111/// - ECDSA    (index 1)
112/// - RSA      (index 2)
113///
114/// **If Codeberg rotates its keys, update this constant and cut a patch release.**
115pub const CODEBERG_FINGERPRINTS: &[&str] = &[
116    "SHA256:mIlxA9k46MmM6qdJOdMnAQpzGxF4WIVVL+fj+wZbw0g", // Ed25519
117    "SHA256:T9FYDEHELhVkulEKKwge5aVhVTbqCW0MIRwAfpARs/E", // ECDSA-SHA2-nistp256
118    "SHA256:6QQmYi4ppFS4/+zSZ5S4IU+4sa6rwvQ4PbhCtPEBekQ", // RSA
119];
120
121// ── Known-hosts parser for custom / GHE support ───────────────────────────────
122
123/// Parses a known-hosts file and returns all fingerprints for `hostname`.
124///
125/// Lines starting with `#` and blank lines are ignored. Each valid line has
126/// the form `hostname SHA256:<fp>`.
127///
128/// # Errors
129///
130/// Returns an error if the file cannot be read.
131fn fingerprints_from_known_hosts(path: &Path, hostname: &str) -> Result<Vec<String>, AnvilError> {
132    let content = std::fs::read_to_string(path)?;
133    let mut fps = Vec::new();
134
135    for line in content.lines() {
136        let line = line.trim();
137        if line.is_empty() || line.starts_with('#') {
138            continue;
139        }
140        let mut parts = line.splitn(2, ' ');
141        let Some(host_part) = parts.next() else {
142            continue;
143        };
144        let Some(fp_part) = parts.next() else {
145            continue;
146        };
147        if host_part == hostname {
148            fps.push(fp_part.trim().to_owned());
149        }
150    }
151
152    Ok(fps)
153}
154
155/// Returns the default known-hosts path: `~/.config/gitway/known_hosts`
156/// (or the platform-equivalent `dirs::config_dir()` location).
157///
158/// Returns `None` when `dirs::config_dir()` cannot resolve a config
159/// directory (extremely rare — typically only on misconfigured CI
160/// runners with no `HOME` / `XDG_CONFIG_HOME` and no fallback).
161///
162/// Promoted from crate-private to public in M19 (PRD §5.8.8) so the
163/// `gitway hosts` subcommand family can target the same path the
164/// rest of Anvil reads from by default.
165#[must_use]
166pub fn default_known_hosts_path() -> Option<std::path::PathBuf> {
167    dirs::config_dir().map(|d| d.join("gitway").join("known_hosts"))
168}
169
170// ── Public verifier ───────────────────────────────────────────────────────────
171
172/// Collects all expected fingerprints for `host`.
173///
174/// For well-known hosts (GitHub, GitLab, Codeberg and their fallback
175/// hostnames) the embedded fingerprint set is returned.  For any other host
176/// the custom known-hosts file is consulted; if it provides entries those are
177/// used, otherwise the connection is refused with an actionable error.
178///
179/// # Errors
180///
181/// Returns an error if `custom_path` is specified but cannot be read, or if
182/// no fingerprints can be found for the given host.
183pub fn fingerprints_for_host(
184    host: &str,
185    custom_path: &Option<std::path::PathBuf>,
186) -> Result<Vec<String>, AnvilError> {
187    // Start with the embedded set for the well-known hosted services.
188    let mut fps: Vec<String> = match host {
189        "github.com" | "ssh.github.com" => {
190            GITHUB_FINGERPRINTS.iter().map(|&s| s.to_owned()).collect()
191        }
192        "gitlab.com" | "altssh.gitlab.com" => {
193            GITLAB_FINGERPRINTS.iter().map(|&s| s.to_owned()).collect()
194        }
195        "codeberg.org" => CODEBERG_FINGERPRINTS
196            .iter()
197            .map(|&s| s.to_owned())
198            .collect(),
199        _ => Vec::new(),
200    };
201
202    // Consult the known-hosts file (user-supplied path or the default location)
203    // to allow custom / self-hosted instances and to let users extend or
204    // override the embedded sets.
205    let known_hosts_path = custom_path.clone().or_else(default_known_hosts_path);
206
207    if let Some(ref path) = known_hosts_path {
208        if path.exists() {
209            let extras = fingerprints_from_known_hosts(path, host)?;
210            fps.extend(extras);
211        }
212    }
213
214    // No fingerprints at all → refuse the connection with a clear message.
215    if fps.is_empty() {
216        return Err(
217            AnvilError::invalid_config(format!("no fingerprints known for host '{host}'"))
218                .with_hint(format!(
219                    "Gitway refuses to connect to hosts whose SSH fingerprint it can't \
220             verify (no trust-on-first-use). Either you typed the hostname \
221             wrong, or this is a self-hosted server and you need to pin its \
222             fingerprint: fetch it from the provider's docs (GitHub, GitLab, \
223             Codeberg publish them) and append one line to \
224             ~/.config/gitway/known_hosts:\n\
225             \n\
226                 {host} SHA256:<base64-fingerprint>\n\
227             \n\
228             As a last resort, re-run with --insecure-skip-host-check (not \
229             recommended — this disables MITM protection)."
230                )),
231        );
232    }
233
234    Ok(fps)
235}
236
237// ── M14: combined trust view (FR-60, FR-64) ──────────────────────────────────
238
239/// Combined view of every `known_hosts` entry that bears on the
240/// connection target.
241///
242/// Returned by [`host_key_trust`].  A connection target's effective
243/// trust is the union of:
244///
245/// - `fingerprints` — direct SHA-256 pins (embedded + custom-file).
246///   Identical to what [`fingerprints_for_host`] returns.
247/// - `cert_authorities` — `@cert-authority` entries whose host pattern
248///   matches the target.  Live cert verification (FR-61, FR-62, FR-63)
249///   is deferred until russh exposes the server's certificate; the
250///   field is populated today so `gitway config show --json` and
251///   audit tooling can surface CA identities.
252/// - `revoked` — `@revoked` entries whose host pattern matches.
253///   Enforced first in
254///   [`crate::session::AnvilSession::connect`]'s host-key check: any
255///   presented key whose fingerprint hits one of these is rejected
256///   regardless of `StrictHostKeyChecking` policy.
257#[derive(Debug, Clone, Default, PartialEq, Eq)]
258pub struct HostKeyTrust {
259    pub fingerprints: Vec<String>,
260    pub cert_authorities: Vec<CertAuthority>,
261    pub revoked: Vec<RevokedEntry>,
262}
263
264/// Returns the [`HostKeyTrust`] for `host`, combining the embedded
265/// fingerprint set, any direct pins / `@cert-authority` / `@revoked`
266/// lines from the user-supplied or default `known_hosts` file, and
267/// pattern-matching for the cert-authority + revoked classes.
268///
269/// Unlike [`fingerprints_for_host`], an empty trust set is **not** an
270/// error — the caller decides whether the absence is fatal (the
271/// `StrictHostKeyChecking::AcceptNew` path tolerates an empty set; the
272/// `Yes` path does not).
273///
274/// # Errors
275/// [`AnvilError::invalid_config`] when the known-hosts file exists but
276/// fails to parse (a malformed `@cert-authority` line, for instance).
277/// File-not-found is silently treated as no entries.
278pub fn host_key_trust(
279    host: &str,
280    custom_path: &Option<std::path::PathBuf>,
281) -> Result<HostKeyTrust, AnvilError> {
282    let mut trust = HostKeyTrust {
283        fingerprints: embedded_fingerprints(host),
284        cert_authorities: Vec::new(),
285        revoked: Vec::new(),
286    };
287
288    let known_hosts_path = custom_path.clone().or_else(default_known_hosts_path);
289    let Some(path) = known_hosts_path else {
290        return Ok(trust);
291    };
292    if !path.exists() {
293        return Ok(trust);
294    }
295
296    let content = std::fs::read_to_string(&path).map_err(|e| {
297        AnvilError::invalid_config(format!(
298            "could not read known_hosts {}: {e}",
299            path.display(),
300        ))
301    })?;
302    let parsed: KnownHostsFile = parse_known_hosts(&content)?;
303
304    for direct in parsed.direct {
305        if wildcard_match(&direct.host_pattern, host) {
306            trust.fingerprints.push(direct.fingerprint);
307        }
308    }
309    for ca in parsed.cert_authorities {
310        if wildcard_match(&ca.host_pattern, host) {
311            trust.cert_authorities.push(ca);
312        }
313    }
314    for rev in parsed.revoked {
315        if wildcard_match(&rev.host_pattern, host) {
316            trust.revoked.push(rev);
317        }
318    }
319
320    Ok(trust)
321}
322
323/// Returns the embedded SHA-256 fingerprints for the listed
324/// well-known hosts.  Internal helper used by both
325/// [`fingerprints_for_host`] and [`host_key_trust`].
326fn embedded_fingerprints(host: &str) -> Vec<String> {
327    match host {
328        "github.com" | "ssh.github.com" => {
329            GITHUB_FINGERPRINTS.iter().map(|&s| s.to_owned()).collect()
330        }
331        "gitlab.com" | "altssh.gitlab.com" => {
332            GITLAB_FINGERPRINTS.iter().map(|&s| s.to_owned()).collect()
333        }
334        "codeberg.org" => CODEBERG_FINGERPRINTS
335            .iter()
336            .map(|&s| s.to_owned())
337            .collect(),
338        _ => Vec::new(),
339    }
340}
341
342/// Appends `host SHA256:<fingerprint>` as a new plaintext line to
343/// the `known_hosts` file at `path`, creating the file (and any
344/// missing parent directories) if needed.
345///
346/// Promoted from crate-private to public in M19 (PRD §5.8.8 FR-85)
347/// so the `gitway hosts add` verb can drive the write side without a
348/// re-export shim.  Used internally by
349/// [`crate::ssh_config::StrictHostKeyChecking::AcceptNew`] for the
350/// first-connection TOFU path.
351///
352/// File locking and duplicate-detection are deferred to a post-M19
353/// polish pass — see PRD §5.8.8 risks.
354///
355/// # Errors
356///
357/// Returns an error if the parent directory cannot be created, the
358/// file cannot be opened for append, or the write fails.
359pub fn append_known_host(path: &Path, host: &str, fingerprint: &str) -> Result<(), AnvilError> {
360    use std::io::Write;
361
362    ensure_parent_exists(path)?;
363
364    let line = format!("{host} {fingerprint}\n");
365    let mut file = std::fs::OpenOptions::new()
366        .append(true)
367        .create(true)
368        .open(path)
369        .map_err(|e| {
370            AnvilError::invalid_config(format!(
371                "could not open known_hosts {} for append: {e}",
372                path.display(),
373            ))
374        })?;
375    file.write_all(line.as_bytes()).map_err(|e| {
376        AnvilError::invalid_config(format!(
377            "could not write to known_hosts {}: {e}",
378            path.display(),
379        ))
380    })?;
381
382    Ok(())
383}
384
385/// Appends `|1|<base64-salt>|<base64-hmac-sha1> SHA256:<fingerprint>`
386/// to the `known_hosts` file at `path`, generating a fresh 20-byte
387/// random salt for this entry.
388///
389/// This is the M19 (PRD §5.8.8 FR-84) write-side counterpart to
390/// [`crate::cert_authority::HashedHost::matches`].  The encoding is
391/// bit-for-bit identical to what `ssh-keygen -H` would write — see
392/// the `tests/test_hostkey_writes.rs` round-trip test that proves it
393/// re-parses through [`crate::cert_authority::parse_known_hosts`] +
394/// [`crate::cert_authority::HashedHost::matches(host)`] cleanly.
395///
396/// `host` is what gets HMAC-SHA1'd; pass exactly the hostname the
397/// caller wants the hash to match (no implicit lower-casing — that
398/// policy lives in the caller, mirroring OpenSSH's
399/// `hostfile.c::lowercase` flag handling).
400///
401/// # Errors
402///
403/// Returns an error if the parent directory cannot be created, the
404/// file cannot be opened for append, or the write fails.
405pub fn append_known_host_hashed(
406    path: &Path,
407    host: &str,
408    fingerprint: &str,
409) -> Result<(), AnvilError> {
410    use std::io::Write;
411
412    ensure_parent_exists(path)?;
413
414    // Fresh 20-byte salt per entry, sourced from the OS RNG.
415    let mut salt = [0u8; 20];
416    OsRng.fill_bytes(&mut salt);
417
418    let mut mac = <Hmac<Sha1>>::new_from_slice(&salt).map_err(|_e| {
419        // `_e` is the InvalidLength variant; HMAC-SHA1 does not
420        // enforce key-length restrictions in practice, so this
421        // branch is effectively dead.  Discarded by design.
422        AnvilError::invalid_config(
423            "HMAC-SHA1 init failed unexpectedly; refusing to write hashed entry".to_owned(),
424        )
425    })?;
426    mac.update(host.as_bytes());
427    let hash = mac.finalize().into_bytes();
428
429    let line = format!(
430        "|1|{}|{} {fingerprint}\n",
431        BASE64.encode(salt),
432        BASE64.encode(hash.as_slice()),
433    );
434    let mut file = std::fs::OpenOptions::new()
435        .append(true)
436        .create(true)
437        .open(path)
438        .map_err(|e| {
439            AnvilError::invalid_config(format!(
440                "could not open known_hosts {} for append: {e}",
441                path.display(),
442            ))
443        })?;
444    file.write_all(line.as_bytes()).map_err(|e| {
445        AnvilError::invalid_config(format!(
446            "could not write to known_hosts {}: {e}",
447            path.display(),
448        ))
449    })?;
450
451    Ok(())
452}
453
454/// Prepends `@revoked <host_pattern> <fingerprint>` to the
455/// `known_hosts` file at `path`, atomically via a sibling tempfile +
456/// rename.  Creates the file (and missing parents) if it does not
457/// yet exist.
458///
459/// M19 (PRD §5.8.8 FR-86): the `@revoked` line is written **first**
460/// in the file so it surfaces ahead of any direct pin during
461/// human inspection.  The trust-merger ([`host_key_trust`])
462/// already treats `@revoked` as a hard reject regardless of position,
463/// so the prepend is purely a readability convention.
464///
465/// # Atomicity
466///
467/// Reads the existing file into memory (capped at 1 MiB), prepends
468/// the new line, writes to `<path>.tmp.<random>`, then
469/// [`std::fs::rename`] over the original.  POSIX `rename` is atomic
470/// within a filesystem; on Windows, `MoveFileEx` with
471/// `MOVEFILE_REPLACE_EXISTING` is the closest equivalent and is what
472/// `std::fs::rename` uses.  A crash mid-rename leaves either the old
473/// file or the new one — never a torn write.
474///
475/// # Errors
476///
477/// Returns an error if the file is larger than 1 MiB, the parent
478/// directory cannot be created, the tempfile cannot be opened, or
479/// the rename fails.
480pub fn prepend_revoked(
481    path: &Path,
482    host_pattern: &str,
483    fingerprint: &str,
484) -> Result<(), AnvilError> {
485    use std::io::Write;
486
487    const MAX_FILE_BYTES: u64 = 1024 * 1024;
488
489    ensure_parent_exists(path)?;
490
491    // Read the existing file (or treat missing as empty).
492    let existing: Vec<u8> = if path.exists() {
493        let metadata = std::fs::metadata(path).map_err(|e| {
494            AnvilError::invalid_config(format!(
495                "could not stat known_hosts {} for revoke: {e}",
496                path.display(),
497            ))
498        })?;
499        if metadata.len() > MAX_FILE_BYTES {
500            return Err(AnvilError::invalid_config(format!(
501                "known_hosts {} is larger than {MAX_FILE_BYTES} bytes; refusing to load \
502                 entire file into memory for revoke. Split the file or pass --known-hosts \
503                 to point at a smaller one.",
504                path.display(),
505            )));
506        }
507        std::fs::read(path).map_err(|e| {
508            AnvilError::invalid_config(format!(
509                "could not read known_hosts {} for revoke: {e}",
510                path.display(),
511            ))
512        })?
513    } else {
514        Vec::new()
515    };
516
517    // Build the temp path with a random suffix so concurrent revokes
518    // don't collide on the same temp name.
519    let mut suffix_bytes = [0u8; 8];
520    OsRng.fill_bytes(&mut suffix_bytes);
521    let suffix = BASE64
522        .encode(suffix_bytes)
523        .replace('/', "_")
524        .replace('+', "-");
525    let tmp_path = path.with_extension(format!("revoke.{suffix}.tmp"));
526
527    let mut tmp = std::fs::OpenOptions::new()
528        .write(true)
529        .create_new(true)
530        .open(&tmp_path)
531        .map_err(|e| {
532            AnvilError::invalid_config(format!(
533                "could not create temp file {} for revoke: {e}",
534                tmp_path.display(),
535            ))
536        })?;
537
538    let new_line = format!("@revoked {host_pattern} {fingerprint}\n");
539    tmp.write_all(new_line.as_bytes())
540        .map_err(|e| AnvilError::invalid_config(format!("could not write revoke header: {e}")))?;
541    tmp.write_all(&existing).map_err(|e| {
542        AnvilError::invalid_config(format!("could not copy existing known_hosts contents: {e}"))
543    })?;
544    tmp.sync_all().map_err(|e| {
545        AnvilError::invalid_config(format!("could not fsync temp file before rename: {e}"))
546    })?;
547    drop(tmp);
548
549    std::fs::rename(&tmp_path, path).map_err(|e| {
550        // Best-effort cleanup of the orphaned tempfile; ignore the
551        // result because we're already in an error path.
552        let _ = std::fs::remove_file(&tmp_path);
553        AnvilError::invalid_config(format!(
554            "could not rename {} -> {}: {e}",
555            tmp_path.display(),
556            path.display(),
557        ))
558    })?;
559
560    Ok(())
561}
562
563/// Returns the embedded fingerprint catalogue as `(host, fingerprint,
564/// algorithm)` triples for surfacing in `gitway hosts list`.
565///
566/// The algorithm tag is one of `"ed25519"`, `"ecdsa"`, `"rsa"` —
567/// matches the per-index ordering inside [`GITHUB_FINGERPRINTS`],
568/// [`GITLAB_FINGERPRINTS`], and [`CODEBERG_FINGERPRINTS`].
569#[must_use]
570pub fn all_embedded() -> Vec<(String, String, &'static str)> {
571    const ALGS: [&str; 3] = ["ed25519", "ecdsa", "rsa"];
572    let mut out = Vec::with_capacity(9);
573    for (host, fps) in [
574        ("github.com", GITHUB_FINGERPRINTS),
575        ("gitlab.com", GITLAB_FINGERPRINTS),
576        ("codeberg.org", CODEBERG_FINGERPRINTS),
577    ] {
578        for (idx, fp) in fps.iter().enumerate() {
579            let alg = ALGS.get(idx).copied().unwrap_or("unknown");
580            out.push((host.to_owned(), (*fp).to_owned(), alg));
581        }
582    }
583    out
584}
585
586/// Per-file format detected by [`detect_hash_mode`].  Drives whether
587/// `gitway hosts add` should emit a hashed or plaintext entry by
588/// default.
589#[derive(Debug, Clone, Copy, PartialEq, Eq)]
590pub enum HashMode {
591    /// File does not exist, or contains no recognizable host lines.
592    Empty,
593    /// At least one direct line uses the plaintext `host SHA256:fp`
594    /// shape; no hashed entries seen.  New entries default to
595    /// plaintext.
596    Plaintext,
597    /// At least one direct line uses the `|1|salt|hash SHA256:fp`
598    /// shape.  New entries default to hashed.
599    Hashed,
600}
601
602/// Inspects the existing `known_hosts` file at `path` and decides
603/// whether new entries should be hashed (matches OpenSSH's
604/// `HashKnownHosts yes` behaviour) or plaintext.
605///
606/// - Returns [`HashMode::Empty`] if the file does not exist or is
607///   empty / contains only comments + `@`-marker lines.
608/// - Returns [`HashMode::Hashed`] if **any** non-comment direct line
609///   starts with `|1|` (matches OpenSSH's `_ssh_host_hashed_p` check).
610/// - Returns [`HashMode::Plaintext`] otherwise.
611///
612/// Cheap — reads the file once line-by-line and short-circuits on
613/// the first hashed token seen.
614///
615/// # Errors
616///
617/// Returns an error only if the file exists but cannot be read.
618pub fn detect_hash_mode(path: &Path) -> Result<HashMode, AnvilError> {
619    if !path.exists() {
620        return Ok(HashMode::Empty);
621    }
622    let content = std::fs::read_to_string(path).map_err(|e| {
623        AnvilError::invalid_config(format!(
624            "could not read known_hosts {} for hash-mode detect: {e}",
625            path.display(),
626        ))
627    })?;
628    let mut saw_plaintext = false;
629    for raw in content.lines() {
630        let line = raw.trim();
631        if line.is_empty() || line.starts_with('#') || line.starts_with('@') {
632            continue;
633        }
634        // Direct line.  Inspect the first whitespace-delimited token.
635        let host_token = line.split_whitespace().next().unwrap_or("");
636        if host_token.starts_with("|1|") {
637            return Ok(HashMode::Hashed);
638        }
639        saw_plaintext = true;
640    }
641    if saw_plaintext {
642        Ok(HashMode::Plaintext)
643    } else {
644        Ok(HashMode::Empty)
645    }
646}
647
648/// Internal helper — `mkdir -p` for the parent of `path`.  Used by
649/// every M19 writer so they share the same error-message shape.
650fn ensure_parent_exists(path: &Path) -> Result<(), AnvilError> {
651    if let Some(parent) = path.parent() {
652        if !parent.as_os_str().is_empty() {
653            std::fs::create_dir_all(parent).map_err(|e| {
654                AnvilError::invalid_config(format!(
655                    "could not create known_hosts parent {}: {e}",
656                    parent.display(),
657                ))
658            })?;
659        }
660    }
661    Ok(())
662}
663
664// ── Tests ─────────────────────────────────────────────────────────────────────
665
666#[cfg(test)]
667mod tests {
668    use super::*;
669
670    #[test]
671    fn github_com_returns_three_fingerprints() {
672        let fps = fingerprints_for_host("github.com", &None).unwrap();
673        assert_eq!(fps.len(), 3);
674    }
675
676    #[test]
677    fn ssh_github_com_returns_same_fingerprints() {
678        let fps = fingerprints_for_host("ssh.github.com", &None).unwrap();
679        assert_eq!(fps.len(), 3);
680    }
681
682    #[test]
683    fn gitlab_com_returns_three_fingerprints() {
684        let fps = fingerprints_for_host("gitlab.com", &None).unwrap();
685        assert_eq!(fps.len(), 3);
686    }
687
688    #[test]
689    fn altssh_gitlab_com_returns_same_fingerprints_as_gitlab() {
690        let primary = fingerprints_for_host("gitlab.com", &None).unwrap();
691        let fallback = fingerprints_for_host("altssh.gitlab.com", &None).unwrap();
692        assert_eq!(primary, fallback);
693    }
694
695    #[test]
696    fn codeberg_org_returns_three_fingerprints() {
697        let fps = fingerprints_for_host("codeberg.org", &None).unwrap();
698        assert_eq!(fps.len(), 3);
699    }
700
701    #[test]
702    fn all_github_fingerprints_start_with_sha256_prefix() {
703        for fp in GITHUB_FINGERPRINTS {
704            assert!(fp.starts_with("SHA256:"), "malformed fingerprint: {fp}");
705        }
706    }
707
708    #[test]
709    fn all_gitlab_fingerprints_start_with_sha256_prefix() {
710        for fp in GITLAB_FINGERPRINTS {
711            assert!(fp.starts_with("SHA256:"), "malformed fingerprint: {fp}");
712        }
713    }
714
715    #[test]
716    fn all_codeberg_fingerprints_start_with_sha256_prefix() {
717        for fp in CODEBERG_FINGERPRINTS {
718            assert!(fp.starts_with("SHA256:"), "malformed fingerprint: {fp}");
719        }
720    }
721
722    #[test]
723    fn unknown_host_without_known_hosts_is_error() {
724        let result = fingerprints_for_host("git.example.com", &None);
725        assert!(result.is_err());
726        let err = result.unwrap_err();
727        assert!(err.to_string().contains("git.example.com"));
728    }
729
730    // ── M14: host_key_trust ──────────────────────────────────────────────────
731
732    /// Helper: write `content` to a fresh temp file and return its path.
733    fn write_known_hosts(content: &str) -> (tempfile::TempDir, std::path::PathBuf) {
734        let dir = tempfile::tempdir().expect("tempdir");
735        let path = dir.path().join("known_hosts");
736        std::fs::write(&path, content).expect("write");
737        (dir, path)
738    }
739
740    #[test]
741    fn host_key_trust_embeds_well_known_fingerprints() {
742        let trust = host_key_trust("github.com", &None).expect("trust");
743        assert_eq!(trust.fingerprints.len(), 3);
744        assert!(trust.cert_authorities.is_empty());
745        assert!(trust.revoked.is_empty());
746    }
747
748    #[test]
749    fn host_key_trust_pattern_matches_cert_authority() {
750        let (_g, path) = write_known_hosts(
751            "@cert-authority *.example.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILM+rvN+ot98qgEN796jTiQfZfG1KaT0PtFDJ/XFSqti ca\n",
752        );
753        let trust = host_key_trust("foo.example.com", &Some(path)).expect("trust");
754        assert_eq!(trust.cert_authorities.len(), 1);
755        assert_eq!(trust.cert_authorities[0].host_pattern, "*.example.com");
756    }
757
758    #[test]
759    fn host_key_trust_pattern_excludes_non_match() {
760        let (_g, path) = write_known_hosts(
761            "@cert-authority *.example.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILM+rvN+ot98qgEN796jTiQfZfG1KaT0PtFDJ/XFSqti ca\n",
762        );
763        let trust = host_key_trust("other.org", &Some(path)).expect("trust");
764        assert!(trust.cert_authorities.is_empty());
765    }
766
767    #[test]
768    fn host_key_trust_revoked_pattern_matches() {
769        let (_g, path) = write_known_hosts(
770            "@revoked *.example.com SHA256:revokedfp\n\
771             @revoked unrelated.com SHA256:other\n",
772        );
773        let trust = host_key_trust("foo.example.com", &Some(path)).expect("trust");
774        assert_eq!(trust.revoked.len(), 1);
775        assert_eq!(trust.revoked[0].fingerprint, "SHA256:revokedfp");
776    }
777
778    #[test]
779    fn host_key_trust_combines_direct_and_embedded() {
780        let (_g, path) = write_known_hosts("github.com SHA256:extra-pin\n");
781        let trust = host_key_trust("github.com", &Some(path)).expect("trust");
782        // Three embedded + one extra direct.
783        assert_eq!(trust.fingerprints.len(), 4);
784        assert!(trust.fingerprints.contains(&"SHA256:extra-pin".to_owned()));
785    }
786
787    #[test]
788    fn host_key_trust_missing_file_returns_embedded_only() {
789        let trust = host_key_trust(
790            "github.com",
791            &Some(std::path::PathBuf::from("/this/path/does/not/exist")),
792        )
793        .expect("trust");
794        assert_eq!(trust.fingerprints.len(), 3);
795        assert!(trust.cert_authorities.is_empty());
796        assert!(trust.revoked.is_empty());
797    }
798
799    #[test]
800    fn host_key_trust_empty_for_unknown_host_no_file() {
801        // Unlike `fingerprints_for_host`, `host_key_trust` does NOT
802        // error on an empty trust set — that is the caller's policy
803        // call.  This is the path the AcceptNew policy relies on.
804        let trust = host_key_trust("git.example.com", &None).expect("trust");
805        assert!(trust.fingerprints.is_empty());
806        assert!(trust.cert_authorities.is_empty());
807        assert!(trust.revoked.is_empty());
808    }
809}