stackpatrol 0.1.0

Single-binary Rust CLI that monitors a server and reports to the StackPatrol control plane.
use std::time::{Duration, SystemTime, UNIX_EPOCH};

use anyhow::{Context, Result, bail};
use stackpatrol_core::event::{Event, EventEnvelope};

use crate::config::Config;

pub fn events_url(cfg: &Config) -> String {
    format!("{}/v1/events", cfg.api_endpoint.trim_end_matches('/'))
}

/// Single HTTP client builder so the daemon and one-shot commands share timeouts.
/// Without these, a hanging connection could stall the daemon's heartbeat loop forever.
pub fn build_client() -> Result<reqwest::Client> {
    reqwest::Client::builder()
        .connect_timeout(Duration::from_secs(5))
        .timeout(Duration::from_secs(10))
        .build()
        .context("building HTTP client")
}

pub fn make_envelope(cfg: &Config, event: Event) -> EventEnvelope {
    let now = SystemTime::now()
        .duration_since(UNIX_EPOCH)
        .map(|d| d.as_secs() as i64)
        .unwrap_or_default();
    EventEnvelope {
        server_name: cfg.server_name.clone(),
        timestamp: now,
        event,
    }
}

/// Worst-case retry budget = BASE * (1 + 2 + 4) = 7×BASE. With BASE=500ms that's
/// ~3.5s of waiting before we give up — well under a 30s heartbeat interval.
const MAX_ATTEMPTS: u32 = 3;
const BASE_DELAY: Duration = Duration::from_millis(500);

/// Outcome of a `try_send_envelope` call. The daemon uses the distinction to decide
/// whether to queue the event in the durable buffer (Transient) or drop it (Permanent).
pub enum SendOutcome {
    Ok,
    Transient(String),
    Permanent(String),
}

/// Post a pre-built envelope to the control plane with bounded exponential-backoff
/// retry. Returns a typed `SendOutcome` so callers can route failures appropriately.
pub async fn try_send_envelope(
    client: &reqwest::Client,
    url: &str,
    cfg: &Config,
    envelope: &EventEnvelope,
) -> SendOutcome {
    let mut attempt = 0u32;
    loop {
        attempt += 1;
        match try_post(client, url, cfg, envelope).await {
            SendOutcome::Ok => return SendOutcome::Ok,
            SendOutcome::Permanent(msg) => return SendOutcome::Permanent(msg),
            SendOutcome::Transient(msg) if attempt >= MAX_ATTEMPTS => {
                return SendOutcome::Transient(format!("after {attempt} attempts: {msg}"));
            }
            SendOutcome::Transient(msg) => {
                let delay = BASE_DELAY * 2u32.pow(attempt - 1);
                eprintln!(
                    "send: attempt {attempt}/{MAX_ATTEMPTS} failed: {msg} — retrying in {}ms",
                    delay.as_millis()
                );
                tokio::time::sleep(delay).await;
            }
        }
    }
}

/// Convenience wrapper for one-shot callers (`alert-test`) that don't need to
/// distinguish transient vs permanent — both collapse to `Err`.
pub async fn send_envelope(
    client: &reqwest::Client,
    url: &str,
    cfg: &Config,
    envelope: &EventEnvelope,
) -> Result<()> {
    match try_send_envelope(client, url, cfg, envelope).await {
        SendOutcome::Ok => Ok(()),
        SendOutcome::Transient(msg) | SendOutcome::Permanent(msg) => bail!(msg),
    }
}

/// Build envelope + send. Used by code paths where the timestamp doesn't need to be
/// preserved across retries — i.e., one-shot commands.
pub async fn send_event(
    client: &reqwest::Client,
    url: &str,
    cfg: &Config,
    event: Event,
) -> Result<()> {
    let envelope = make_envelope(cfg, event);
    send_envelope(client, url, cfg, &envelope).await
}

async fn try_post(
    client: &reqwest::Client,
    url: &str,
    cfg: &Config,
    envelope: &EventEnvelope,
) -> SendOutcome {
    let resp = match client
        .post(url)
        .bearer_auth(&cfg.token)
        .json(envelope)
        .send()
        .await
    {
        Ok(r) => r,
        Err(e) => {
            // reqwest doesn't expose a clean is_retryable, but anything we'd see here
            // (timeout, connect, body, decode) is transient from the daemon's POV.
            return SendOutcome::Transient(format!("network: {e}"));
        }
    };

    let status = resp.status();
    if status.is_success() {
        return SendOutcome::Ok;
    }
    let body = resp.text().await.unwrap_or_default();
    if status.is_server_error() || status.as_u16() == 429 {
        SendOutcome::Transient(format!("HTTP {status}{body}"))
    } else {
        // 4xx (other than 429): bad token, validation error, plan-limit denial.
        // Retrying won't help.
        SendOutcome::Permanent(format!("HTTP {status}{body}"))
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn events_url_strips_trailing_slash() {
        let mut cfg = Config {
            server_name: "x".into(),
            token: "y".into(),
            api_endpoint: "https://api.example.com/".into(),
            daemon: Default::default(),
            docker: Default::default(),
            systemd: Default::default(),
            resources: Default::default(),
            ports: Default::default(),
        };
        assert_eq!(events_url(&cfg), "https://api.example.com/v1/events");
        cfg.api_endpoint = "https://api.example.com".into();
        assert_eq!(events_url(&cfg), "https://api.example.com/v1/events");
    }

    #[test]
    fn backoff_schedule_is_bounded() {
        let total: Duration = (0..MAX_ATTEMPTS - 1)
            .map(|i| BASE_DELAY * 2u32.pow(i))
            .sum();
        assert!(total < Duration::from_secs(5), "retry budget too long: {total:?}");
    }
}