cindy 0.2.0

Managing infrastructure at breakneck speed.
Documentation
//! Download a file over HTTP(S) to a path on the remote machine.
//!
//! Idempotent when a [`checksum`](State::checksum) is supplied: if the
//! destination already exists and its digest matches, the download is
//! skipped. Without a checksum the file is (re)fetched only when it's
//! missing, unless [`force`](State::force) is set.
//!
//! HTTP is performed with the pure-Rust [`ureq`] client over `rustls`
//! (no OpenSSL / system TLS), so the build stays free of C and dynamic
//! library dependencies.

use std::io::Read as _;
use std::path::PathBuf;

use sha2::Digest as _;

use crate as cindy;
use crate::Context;

/// A digest to verify a downloaded file against. Pairing the algorithm
/// with the expected hex string makes a fetch both *idempotent* (skip
/// when the on-disk file already matches) and *safe* (a corrupted or
/// tampered download is rejected).
#[derive(Clone, PartialEq, Eq)]
#[crate::wire]
pub enum Checksum {
    Sha256(String),
    Sha512(String),
}

impl Checksum {
    /// Lower-cased expected hex digest.
    fn expected(&self) -> String {
        match self {
            Checksum::Sha256(h) | Checksum::Sha512(h) => h.trim().to_ascii_lowercase(),
        }
    }

    /// Compute this checksum's digest over `bytes`, as a hex string.
    fn digest(&self, bytes: &[u8]) -> String {
        match self {
            Checksum::Sha256(_) => hex(&sha2::Sha256::digest(bytes)),
            Checksum::Sha512(_) => hex(&sha2::Sha512::digest(bytes)),
        }
    }

    /// Does `bytes` match the expected digest?
    fn matches(&self, bytes: &[u8]) -> bool {
        self.digest(bytes) == self.expected()
    }
}

fn hex(bytes: &[u8]) -> String {
    use std::fmt::Write as _;
    bytes.iter().fold(String::new(), |mut s, b| {
        let _ = write!(s, "{b:02x}");
        s
    })
}

#[derive(Clone, Default, PartialEq, Eq)]
#[crate::wire]
pub struct State {
    /// Source URL (`http`/`https`).
    pub url: String,
    /// Destination path on the remote.
    pub dest: PathBuf,
    /// Optional digest to verify the download against and to make the
    /// task idempotent. `None` ⇒ skip only when the file already exists.
    pub checksum: Option<Checksum>,
    /// Owning user for the downloaded file. `None` ⇒ leave as-is.
    pub user: Option<String>,
    /// Owning group for the downloaded file. `None` ⇒ leave as-is.
    pub group: Option<String>,
    /// Mode to set on the downloaded file. `None` ⇒ leave the OS default.
    pub mode: Option<super::path::Mode>,
    /// Re-download even if the destination already exists (and, with a
    /// checksum, already matches).
    pub force: bool,
}

/// Decide whether the download can be skipped: the destination is a
/// readable regular file and (its checksum matches, or no checksum was
/// given).
///
/// Any read failure — missing file, or the destination being a
/// directory/symlink/special file — means "not satisfied, proceed";
/// the actual write goes through the `path` module, which handles the
/// transition from whatever is there now to a regular file.
fn already_satisfied(state: &State) -> bool {
    if state.force {
        return false;
    }
    let Ok(existing) = std::fs::read(&state.dest) else {
        return false;
    };
    match &state.checksum {
        Some(sum) => sum.matches(&existing),
        None => true,
    }
}

/// `GET` the URL and return the full response body.
fn download(url: &str) -> crate::Result<Vec<u8>> {
    let mut response = ureq::get(url).call().context(format!("GET {url} failed"))?;

    let mut body = Vec::new();
    response
        .body_mut()
        .as_reader()
        .read_to_end(&mut body)
        .context(format!("Reading response body from {url} failed"))?;
    Ok(body)
}

/// Download a file to the remote, verifying its checksum if given.
///
/// The download is fetched into memory and verified, then materialised
/// on disk by delegating to the [`path`](super::path) module: that's
/// what gives `fetch` correct file/dir/symlink transition handling,
/// ownership, and the same struct diff as every other builtin — without
/// reimplementing any of it here.
#[crate::remote]
pub fn fetch(state: State) -> crate::Result<super::Return> {
    if already_satisfied(&state) {
        return Ok(super::Return::Unchanged);
    }

    eprintln!("fetch {} -> {}", state.url, state.dest.display());
    let body = download(&state.url)?;

    if let Some(sum) = &state.checksum
        && !sum.matches(&body)
    {
        crate::bail!(
            "checksum mismatch for {}: expected {}, got {}",
            state.url,
            sum.expected(),
            sum.digest(&body),
        );
    }

    // Hand the bytes to the `path` module, which owns the on-disk
    // state-transition logic (and emits the diff). `path` requires a
    // total owner/mode spec; fetch's own owner/mode are optional, so
    // fill any unspecified field from the worker's current identity
    // (and a conventional `0o644` for an unspecified mode).
    let (user, group) = super::current_owner_names();
    super::path::file_raw::inner(
        state.dest,
        body,
        state.user.unwrap_or(user),
        state.group.unwrap_or(group),
        state.mode.unwrap_or_else(|| 0o644.into()),
    )
}