bee-check 0.6.0

Retrievability checker for Ethereum Swarm references. Multi-vantage stewardship probes, per-chunk drill-down, and one-shot re-seed.
Documentation
//! `bee-check` — retrievability checker for Swarm references.
//!
//! Multi-vantage probe of `GET /stewardship/{ref}` across one or more
//! Bee nodes, with optional per-chunk drill-down and re-seed.

use anyhow::{Context, Result};
use clap::{Parser, ValueEnum};

use bee_check::{
    DEFAULT_GATEWAY, OutputFormat, ParsedInput, ReseedRequest, annotate_target_overlay,
    check_gateways, check_multi_vantage, check_stamp, cold_download_all, drill_down,
    merge_gateways, parse_input, render_report, render_stamp_status, reseed, resolve_feed,
};

#[derive(Parser, Debug)]
#[command(
    name = "bee-check",
    version,
    about = "Retrievability checker for Swarm references",
    long_about = "Probe one or more Bee nodes to determine whether a Swarm \
                  reference is retrievable from the network. Supports \
                  per-chunk drill-down and one-shot re-seed via stewardship."
)]
struct Cli {
    /// Swarm reference (64- or 128-hex) or feed reference
    /// `feed:OWNER:TOPIC` (40-hex owner, 64-hex topic). Feed inputs are
    /// resolved via `GET /feeds/{owner}/{topic}` on the first --bee
    /// before probing.
    #[arg(value_name = "INPUT")]
    reference: String,

    /// Bee API URL(s) to probe. Repeat for multi-vantage. Defaults to
    /// $BEE_API_URL or http://localhost:1633.
    #[arg(short = 'b', long = "bee", value_name = "URL")]
    bee: Vec<String>,

    /// Public gateway URL(s) to HEAD-probe via `{gw}/bzz/{ref}/`.
    /// Repeat for multiple. Default: api.gateway.ethswarm.org unless
    /// --no-gateway is set.
    #[arg(long = "gateway", value_name = "URL", conflicts_with = "no_gateway")]
    gateway: Vec<String>,

    /// Skip public-gateway probing entirely.
    #[arg(long)]
    no_gateway: bool,

    /// Auto-discover vantages from a bee-routing service. Given its
    /// base URL, bee-check queries `GET {url}/routing/v1/closest/{ref}`
    /// and adds the returned (dialable) seed URLs as vantages, ordered
    /// closest-first to the reference. Combined with any explicit
    /// --bee. If --auto-discover is the only source and it fails,
    /// bee-check exits rather than silently probing nothing.
    #[arg(long, value_name = "ROUTING_URL")]
    auto_discover: Option<String>,

    /// Compute and display the proximity order between each vantage
    /// and this target overlay (hex). Vantages are re-sorted closest-
    /// first. Useful for "from a node near neighborhood X, is this
    /// retrievable?" questions.
    #[arg(long, value_name = "HEX")]
    target_overlay: Option<String>,

    /// Walk the manifest and probe each chunk per vantage.
    #[arg(long)]
    per_chunk: bool,

    /// Cold end-to-end download probe. For each --bee, issues
    /// `GET /bytes/{ref}` and streams the body to EOF. For each
    /// --gateway, issues `GET /bzz/{ref}/`. Records bytes_downloaded
    /// + elapsed_ms. Complements the stewardship probe (which only
    /// walks chunks) by exercising the HTTP body transport.
    #[arg(long)]
    cold: bool,

    /// After probing, re-upload the reference via PUT /stewardship/{ref}.
    /// Requires --stamp.
    #[arg(long)]
    reseed: bool,

    /// Postage batch ID for re-seed.
    #[arg(long, value_name = "ID", requires = "reseed")]
    stamp: Option<String>,

    /// Per-call timeout in seconds.
    #[arg(long, default_value_t = 60)]
    timeout: u64,

    /// Max concurrent chunk probes during drill-down.
    #[arg(long, default_value_t = 8)]
    concurrency: usize,

    /// Output format.
    #[arg(long, value_enum, default_value_t = OutputKind::Text)]
    output: OutputKind,

    /// Increase log verbosity on stderr. Repeat for more: `-v` info,
    /// `-vv` debug, `-vvv` trace. The default level is `warn` so
    /// stderr stays quiet unless something is wrong. Overridden by
    /// `RUST_LOG` if set.
    #[arg(short = 'v', long, action = clap::ArgAction::Count)]
    verbose: u8,
}

#[derive(Copy, Clone, Debug, ValueEnum)]
enum OutputKind {
    Text,
    Json,
}

impl From<OutputKind> for OutputFormat {
    fn from(k: OutputKind) -> Self {
        match k {
            OutputKind::Text => OutputFormat::Text,
            OutputKind::Json => OutputFormat::Json,
        }
    }
}

fn default_bees() -> Vec<String> {
    std::env::var("BEE_API_URL")
        .ok()
        .into_iter()
        .chain(std::iter::once("http://localhost:1633".to_string()))
        .take(1)
        .collect()
}

/// Build the bee-routing closest-lookup URL for a reference.
fn closest_url(routing_url: &str, reference: &str) -> String {
    format!(
        "{}/routing/v1/closest/{}",
        routing_url.trim_end_matches('/'),
        reference
    )
}

/// Minimal shape of a bee-routing `/routing/v1/closest/{addr}`
/// response — we only need the dialable seed URLs.
#[derive(serde::Deserialize)]
struct ClosestResponse {
    #[serde(default)]
    seeds: Vec<ClosestSeed>,
}

#[derive(serde::Deserialize)]
struct ClosestSeed {
    url: String,
}

/// Query a bee-routing service for the seeds closest to `reference`
/// and return their Bee API URLs (already proximity-ordered by the
/// service). Used by `--auto-discover`.
async fn discover_vantages(
    routing_url: &str,
    reference: &str,
    timeout: std::time::Duration,
) -> Result<Vec<String>> {
    let url = closest_url(routing_url, reference);
    let client = reqwest::Client::builder().timeout(timeout).build()?;
    let resp = client
        .get(&url)
        .send()
        .await
        .with_context(|| format!("GET {url}"))?;
    let status = resp.status();
    if !status.is_success() {
        anyhow::bail!("routing service {url} returned HTTP {status}");
    }
    let parsed: ClosestResponse = resp
        .json()
        .await
        .with_context(|| format!("parse routing response from {url}"))?;
    Ok(parsed.seeds.into_iter().map(|s| s.url).collect())
}

fn init_tracing(verbosity: u8) {
    use tracing_subscriber::EnvFilter;
    let default = match verbosity {
        0 => "warn",
        1 => "bee_check=info,bee=info",
        2 => "bee_check=debug,bee=debug",
        _ => "bee_check=trace,bee=trace",
    };
    let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(default));
    tracing_subscriber::fmt()
        .with_env_filter(filter)
        .with_writer(std::io::stderr)
        .with_target(false)
        .compact()
        .init();
}

#[tokio::main]
async fn main() -> Result<()> {
    let cli = Cli::parse();
    init_tracing(cli.verbose);

    // Validate flag combinations BEFORE running any probe so a typo
    // like `--reseed` without `--stamp` fails fast instead of wasting
    // a multi-vantage probe.
    if cli.reseed && cli.stamp.is_none() {
        anyhow::bail!("--reseed requires --stamp <batch-id>");
    }

    // When --auto-discover is the sole vantage source, don't seed the
    // default localhost — the routing service supplies the vantages.
    let mut bees: Vec<String> = if cli.bee.is_empty() {
        if cli.auto_discover.is_some() {
            Vec::new()
        } else {
            default_bees()
        }
    } else {
        cli.bee.clone()
    };

    let timeout = std::time::Duration::from_secs(cli.timeout);

    // Resolve the input. A `feed:OWNER:TOPIC` is turned into a bare
    // reference via the first vantage; everything else is taken as a
    // reference verbatim.
    let (reference, resolution) = match parse_input(&cli.reference) {
        // Normalize to lowercase so the report's `reference` matches
        // chunk addresses (which `Reference::to_hex` always emits as
        // lowercase) and so two reports for the same content compare
        // equal regardless of how the user typed the hex.
        ParsedInput::Reference(r) => (r.to_lowercase(), None),
        ParsedInput::Feed { owner, topic } => {
            let first_bee = bees
                .first()
                .context("feed resolution requires at least one --bee URL")?;
            let (r, res) = resolve_feed(first_bee, &owner, &topic, timeout)
                .await
                .context("feed resolution failed")?;
            eprintln!("resolved feed -> {r}");
            (r, Some(res))
        }
    };

    // Auto-discover vantages from a bee-routing service, using the
    // RESOLVED reference (so a feed input discovers nodes near the
    // current target). The routing service only ever learns "who's
    // close to address X?" — never that we're probing it.
    if let Some(routing) = cli.auto_discover.as_deref() {
        match discover_vantages(routing, &reference, timeout).await {
            Ok(found) => {
                let before = bees.len();
                for url in found {
                    if !bees.contains(&url) {
                        bees.push(url);
                    }
                }
                eprintln!(
                    "auto-discover: added {} vantage(s) from {routing}",
                    bees.len() - before
                );
            }
            Err(e) if bees.is_empty() => {
                return Err(e)
                    .context("auto-discover failed and no --bee vantages were supplied");
            }
            Err(e) => {
                eprintln!("auto-discover failed ({e:#}); continuing with supplied vantages");
            }
        }
    }
    if bees.is_empty() {
        anyhow::bail!(
            "no vantages to probe — supply --bee or a reachable --auto-discover service"
        );
    }

    let gateways: Vec<String> = if cli.no_gateway {
        Vec::new()
    } else if cli.gateway.is_empty() {
        vec![DEFAULT_GATEWAY.to_string()]
    } else {
        cli.gateway.clone()
    };

    let (vantage_report, gateway_results) = tokio::join!(
        check_multi_vantage(&reference, &bees, timeout),
        check_gateways(&reference, &gateways, timeout),
    );
    let mut report = vantage_report.context("multi-vantage check failed")?;
    let gateways_out = gateway_results.context("gateway probe failed")?;
    report = merge_gateways(report, gateways_out);
    report.resolution = resolution;

    let report = if cli.per_chunk {
        drill_down(report, &bees, timeout, cli.concurrency)
            .await
            .context("per-chunk drill-down failed")?
    } else {
        report
    };

    let report = match cli.target_overlay.as_deref() {
        Some(target) => annotate_target_overlay(report, target),
        None => report,
    };

    let mut report = report;
    if cli.cold {
        let cold = cold_download_all(&bees, &gateways, &reference, timeout)
            .await
            .context("cold-download probe failed")?;
        report.cold_downloads = cold;
    }

    print!("{}", render_report(&report, cli.output.into()));

    if cli.reseed {
        let stamp = cli
            .stamp
            .as_ref()
            .context("--reseed requires --stamp <batch-id>")?;
        let target_bee = bees.first().context("no bee URL for --reseed")?;
        let status = check_stamp(target_bee, stamp, timeout)
            .await
            .context("stamp pre-flight failed")?;
        eprint!("{}", render_stamp_status(&status));
        if !status.exists || !status.usable {
            anyhow::bail!("refusing to re-seed: stamp is not usable; see warnings above");
        }
        let req = ReseedRequest {
            reference: reference.clone(),
            bee_url: target_bee.clone(),
            batch_id: stamp.clone(),
            timeout,
        };
        reseed(req).await.context("re-seed failed")?;
        eprintln!("re-seeded {} via {}", reference, target_bee);
    }

    let any_retrievable = report.vantages.iter().any(|v| v.retrievable == Some(true))
        || report.gateways.iter().any(|g| g.retrievable == Some(true));
    if !any_retrievable {
        std::process::exit(2);
    }
    Ok(())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn closest_url_trims_and_joins() {
        assert_eq!(
            closest_url("http://routing:8080", "abcd"),
            "http://routing:8080/routing/v1/closest/abcd"
        );
        // trailing slash on the base is trimmed
        assert_eq!(
            closest_url("http://routing:8080/", "abcd"),
            "http://routing:8080/routing/v1/closest/abcd"
        );
    }

    #[test]
    fn closest_response_parses_seed_urls() {
        let json = r#"{
            "address": "abcd",
            "seeds": [
                {"url": "http://bee-1:1633", "overlay": "aa", "proximity_order": 20},
                {"url": "http://bee-2:1633", "overlay": "ab", "proximity_order": 12}
            ],
            "peers": [{"overlay": "ac"}]
        }"#;
        let parsed: ClosestResponse = serde_json::from_str(json).unwrap();
        let urls: Vec<String> = parsed.seeds.into_iter().map(|s| s.url).collect();
        assert_eq!(urls, vec!["http://bee-1:1633", "http://bee-2:1633"]);
    }
}