nornir 0.4.13

Companion to cargo: dependency tracking, release gating, deploy, benchmarks, and documentation assembly. Project-agnostic.
Documentation
//! Pure-Rust git-over-SSH (via `russh`) — **no `ssh` subprocess**, honoring
//! nornir's no-shellout rule (gix's own SSH transport execs `ssh`, which is why
//! [`crate::gitio`] refuses SSH and defers here).
//!
//! Implemented:
//! - **`ls-remote`** — connect, authenticate with the nornir ed25519 deploy key,
//!   run `git-upload-pack '<path>'`, and parse the server's ref advertisement
//!   (pkt-line) into `(sha, refname)` pairs. The cheap poll primitive.
//! - **pack transfer (fetch/clone)** — [`connect_upload_pack`] execs
//!   `git-upload-pack` and exposes the channel as blocking `Read`/`Write`
//!   (bridged from russh's async channel via `tokio_util`'s `SyncIoBridge`).
//!   [`crate::gitio`] hands that to gix's blocking git transport
//!   (`ConnectMode::Process`), so gix drives the full object negotiation and
//!   pack indexing — no `ssh`/`git` subprocess, no hand-rolled packfile code.
//!
//! Not yet: `git-receive-pack` (SSH **push**). The URL- and pkt-line parsers
//! below are transport-agnostic and unit-tested.

use std::path::Path;
use std::sync::Arc;

use anyhow::{anyhow, bail, Context, Result};
use tokio::io::{ReadHalf, WriteHalf};
use tokio_util::io::SyncIoBridge;

/// A parsed SSH git location.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SshLocation {
    pub user: String,
    pub host: String,
    pub port: u16,
    /// Repository path as the remote `git-upload-pack` expects it.
    pub path: String,
}

/// Parse `git@host:owner/repo(.git)` (scp-like) or
/// `ssh://[user@]host[:port]/path` into an [`SshLocation`]. Defaults: user
/// `git`, port `22`.
pub fn parse_ssh_url(url: &str) -> Result<SshLocation> {
    if let Some(rest) = url.strip_prefix("ssh://") {
        // ssh://[user@]host[:port]/path
        let (authority, path) = rest
            .split_once('/')
            .ok_or_else(|| anyhow!("ssh url `{url}` has no path"))?;
        let (user, hostport) = match authority.split_once('@') {
            Some((u, hp)) => (u.to_string(), hp),
            None => ("git".to_string(), authority),
        };
        let (host, port) = match hostport.split_once(':') {
            Some((h, p)) => (
                h.to_string(),
                p.parse().with_context(|| format!("ssh url `{url}` bad port"))?,
            ),
            None => (hostport.to_string(), 22),
        };
        if host.is_empty() {
            bail!("ssh url `{url}` has empty host");
        }
        return Ok(SshLocation {
            user,
            host,
            port,
            path: format!("/{path}"),
        });
    }
    // Any other explicit scheme (http://, https://, git://) is not SSH.
    if url.contains("://") {
        bail!("`{url}` is not an SSH url (expected host:path or ssh://…)");
    }
    // scp-like: [user@]host:path
    let (userhost, path) = url
        .split_once(':')
        .ok_or_else(|| anyhow!("`{url}` is not an SSH url (expected host:path or ssh://…)"))?;
    let (user, host) = match userhost.split_once('@') {
        Some((u, h)) => (u.to_string(), h.to_string()),
        None => ("git".to_string(), userhost.to_string()),
    };
    if host.is_empty() || path.is_empty() {
        bail!("ssh url `{url}` has empty host or path");
    }
    Ok(SshLocation {
        user,
        host,
        port: 22,
        path: path.to_string(),
    })
}

/// Parse a git smart-protocol **ref advertisement** (the bytes a server emits
/// right after `git-upload-pack`) into `(sha, refname)` pairs.
///
/// pkt-line framing: each line is 4 hex digits of length (counting the 4) then
/// the payload; `0000` is a flush. The first ref line carries capabilities
/// after a NUL. A leading `# service=…` line (smart-HTTP style) is skipped.
pub fn parse_ref_advertisement(mut buf: &[u8]) -> Result<Vec<(String, String)>> {
    let mut refs = Vec::new();
    loop {
        if buf.len() < 4 {
            break;
        }
        let len_hex = std::str::from_utf8(&buf[..4]).context("pkt-line length not utf8")?;
        let len = usize::from_str_radix(len_hex, 16)
            .with_context(|| format!("pkt-line length `{len_hex}` not hex"))?;
        if len == 0 {
            // flush-pkt — end of the advertisement (first section).
            break;
        }
        if len < 4 || len > buf.len() {
            bail!("pkt-line length {len} out of range (have {} bytes)", buf.len());
        }
        let payload = &buf[4..len];
        buf = &buf[len..];

        // Drop a trailing newline, then split off capabilities after a NUL.
        let line = payload.strip_suffix(b"\n").unwrap_or(payload);
        let line = line.split(|&b| b == 0).next().unwrap_or(line);
        let text = std::str::from_utf8(line).context("ref line not utf8")?;
        if text.starts_with("# service=") {
            continue;
        }
        if let Some((sha, name)) = text.split_once(' ') {
            if sha.len() == 40 && sha.bytes().all(|b| b.is_ascii_hexdigit()) {
                refs.push((sha.to_string(), name.to_string()));
            }
        }
    }
    Ok(refs)
}

// ── russh client ────────────────────────────────────────────────────────────

struct Client;

impl russh::client::Handler for Client {
    type Error = russh::Error;

    // We trust the host key (deploy-time TOFU is out of scope here; the bearer
    // of the deploy key already gates access). Returning Ok(true) accepts it.
    async fn check_server_key(
        &mut self,
        _server_public_key: &russh::keys::ssh_key::PublicKey,
    ) -> Result<bool, Self::Error> {
        Ok(true)
    }
}

/// Connect to `loc` and authenticate with the OpenSSH private key at `key_path`.
/// Shared by [`ls_remote`] and [`connect_upload_pack`]. The returned session
/// handle owns the connection's I/O task — keep it alive while the channel is in
/// use.
async fn connect_session(
    loc: &SshLocation,
    key_path: &Path,
) -> Result<russh::client::Handle<Client>> {
    use russh::keys::{PrivateKeyWithHashAlg, ssh_key::PrivateKey};

    let key_pem = std::fs::read_to_string(key_path)
        .with_context(|| format!("read ssh key {}", key_path.display()))?;
    let key = PrivateKey::from_openssh(&key_pem)
        .with_context(|| format!("parse OpenSSH key {}", key_path.display()))?;

    let config = Arc::new(russh::client::Config::default());
    let mut session = russh::client::connect(config, (loc.host.as_str(), loc.port), Client)
        .await
        .with_context(|| format!("ssh connect {}:{}", loc.host, loc.port))?;

    let auth = session
        .authenticate_publickey(&loc.user, PrivateKeyWithHashAlg::new(Arc::new(key), None))
        .await
        .context("ssh publickey auth")?;
    if !auth.success() {
        bail!("ssh publickey auth rejected for {}@{}", loc.user, loc.host);
    }
    Ok(session)
}

/// `ls-remote` over SSH: connect to `loc`, run `git-upload-pack`, and return the
/// advertised refs (reading only the initial ref advertisement — no negotiation).
pub async fn ls_remote(loc: &SshLocation, key_path: &Path) -> Result<Vec<(String, String)>> {
    let session = connect_session(loc, key_path).await?;
    let mut channel = session
        .channel_open_session()
        .await
        .context("ssh open session channel")?;
    // Quoting per git's scp-style transport.
    let cmd = format!("git-upload-pack '{}'", loc.path);
    channel.exec(true, cmd.as_bytes()).await.context("ssh exec git-upload-pack")?;

    // `git-upload-pack` sends the ref advertisement and then *waits for the
    // client's wants* — it never EOFs on its own. Send a flush-pkt ("I want
    // nothing") so it finishes and closes; otherwise the read loop below blocks
    // forever.
    channel
        .data(&b"0000"[..])
        .await
        .context("ssh send flush-pkt")?;

    let mut out: Vec<u8> = Vec::new();
    while let Some(msg) = channel.wait().await {
        match msg {
            russh::ChannelMsg::Data { ref data } => out.extend_from_slice(data),
            russh::ChannelMsg::Eof | russh::ChannelMsg::ExitStatus { .. } => break,
            _ => {}
        }
    }
    parse_ref_advertisement(&out)
}

/// Blocking wrapper for the sync fetch/poll path: spins a small current-thread
/// runtime and runs [`ls_remote`].
pub fn ls_remote_blocking(url: &str, key_path: &Path) -> Result<Vec<(String, String)>> {
    let loc = parse_ssh_url(url)?;
    let rt = tokio::runtime::Builder::new_current_thread()
        .enable_all()
        .build()
        .context("build runtime for ssh ls-remote")?;
    rt.block_on(ls_remote(&loc, key_path))
}

/// An authenticated `git-upload-pack` channel exposed as blocking `Read`/`Write`.
///
/// russh is async; gix's blocking git transport wants `std::io::{Read, Write}`.
/// We own a tokio runtime that drives the russh connection and bridge the
/// channel's two halves with [`SyncIoBridge`]. The runtime + session handle are
/// retained so the connection stays alive for the duration of the fetch; the
/// `reader`/`writer` are handed (by `&mut`) to a `gix` `git::Connection`.
///
/// IMPORTANT: the blocking `reader`/`writer` calls must run on a thread that is
/// **not** one of this runtime's workers (else `Handle::block_on` panics) — i.e.
/// drive gix's fetch on the calling thread, as [`crate::gitio`] does.
pub struct UploadPack {
    /// Drives the russh connection; retained for liveness, dropped last.
    _rt: tokio::runtime::Runtime,
    /// Connection handle; retained so the channel stays open during the fetch.
    _session: russh::client::Handle<Client>,
    /// Blocking reader over the channel's stdout (the pack stream).
    pub reader: SyncIoBridge<ReadHalf<russh::ChannelStream<russh::client::Msg>>>,
    /// Blocking writer over the channel's stdin (want/have negotiation).
    pub writer: SyncIoBridge<WriteHalf<russh::ChannelStream<russh::client::Msg>>>,
}

/// Open an authenticated `git-upload-pack` channel to `loc` and return it as
/// blocking I/O for gix's transport. See [`UploadPack`].
pub fn connect_upload_pack(loc: &SshLocation, key_path: &Path) -> Result<UploadPack> {
    let rt = tokio::runtime::Builder::new_multi_thread()
        .enable_all()
        .build()
        .context("build runtime for ssh upload-pack")?;

    let (session, read_half, write_half) = rt.block_on(async {
        let session = connect_session(loc, key_path).await?;
        let channel = session
            .channel_open_session()
            .await
            .context("ssh open session channel")?;
        // Negotiate git protocol v2 (gix's v1 impl can deadlock). The server only
        // speaks v2 if this env is set before the command — GitHub honors it.
        // Best-effort: a server that ignores it falls back to v0, which gix's V2
        // client also handles. (ls-remote deliberately omits this and reads the
        // v0 advertisement.)
        channel.set_env(false, "GIT_PROTOCOL", "version=2").await.ok();
        let cmd = format!("git-upload-pack '{}'", loc.path);
        channel
            .exec(true, cmd.as_bytes())
            .await
            .context("ssh exec git-upload-pack")?;
        let (r, w) = tokio::io::split(channel.into_stream());
        Ok::<_, anyhow::Error>((session, r, w))
    })?;

    let handle = rt.handle().clone();
    let reader = SyncIoBridge::new_with_handle(read_half, handle.clone());
    let writer = SyncIoBridge::new_with_handle(write_half, handle);
    Ok(UploadPack { _rt: rt, _session: session, reader, writer })
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn parse_scp_like() {
        let l = parse_ssh_url("git@github.com:octocat/Hello-World.git").unwrap();
        assert_eq!(l.user, "git");
        assert_eq!(l.host, "github.com");
        assert_eq!(l.port, 22);
        assert_eq!(l.path, "octocat/Hello-World.git");
    }

    #[test]
    fn parse_ssh_scheme_with_port_and_user() {
        let l = parse_ssh_url("ssh://deploy@git.example.com:2222/srv/repos/foo.git").unwrap();
        assert_eq!(l.user, "deploy");
        assert_eq!(l.host, "git.example.com");
        assert_eq!(l.port, 2222);
        assert_eq!(l.path, "/srv/repos/foo.git");
    }

    #[test]
    fn reject_https() {
        assert!(parse_ssh_url("https://github.com/octocat/Hello-World").is_err());
    }

    #[test]
    fn parse_advertisement() {
        // Two refs; first carries a NUL-separated capability list; then flush.
        let sha1 = "7fd1a60b01f91b314f59955a4e4d4e80d8edf11d";
        let sha2 = "1111111111111111111111111111111111111111";
        let line1_body = format!("{sha1} HEAD\0multi_ack symref=HEAD:refs/heads/main\n");
        let line2_body = format!("{sha2} refs/heads/main\n");
        let mut buf = Vec::new();
        for body in [line1_body, line2_body] {
            let len = body.len() + 4;
            buf.extend_from_slice(format!("{len:04x}").as_bytes());
            buf.extend_from_slice(body.as_bytes());
        }
        buf.extend_from_slice(b"0000"); // flush

        let refs = parse_ref_advertisement(&buf).unwrap();
        assert_eq!(refs.len(), 2);
        assert_eq!(refs[0], (sha1.to_string(), "HEAD".to_string()));
        assert_eq!(refs[1], (sha2.to_string(), "refs/heads/main".to_string()));
    }
}