stackpatrol 0.1.0

Single-binary Rust CLI that monitors a server and reports to the StackPatrol control plane.
use std::collections::HashMap;

use anyhow::{Context, Result, bail};
use stackpatrol_core::event::Event;
use tokio::process::Command;

pub struct SystemdProbe {
    units: Vec<String>,
    /// unit_name -> was_up_last_tick
    last_state: HashMap<String, bool>,
    initialized: bool,
}

impl SystemdProbe {
    pub fn new(units: Vec<String>) -> Self {
        Self {
            units,
            last_state: HashMap::new(),
            initialized: false,
        }
    }

    pub fn enabled(&self) -> bool {
        !self.units.is_empty()
    }

    /// Query `systemctl is-active` for every configured unit and return events for
    /// state transitions since the previous tick. First tick records baseline and
    /// emits nothing — same no-startup-storm rule as the Docker probe.
    pub async fn tick(&mut self) -> Vec<Event> {
        if !self.enabled() {
            return Vec::new();
        }

        let states = match scan(&self.units).await {
            Ok(s) => s,
            Err(err) => {
                eprintln!("systemd probe: scan failed: {err:#}");
                return Vec::new();
            }
        };

        let current: HashMap<String, bool> = states
            .into_iter()
            .map(|(unit, state)| (unit, is_up(&state)))
            .collect();

        if !self.initialized {
            self.last_state = current;
            self.initialized = true;
            return Vec::new();
        }

        let mut events = Vec::new();
        for (name, up_now) in &current {
            match self.last_state.get(name) {
                Some(up_before) if up_before == up_now => {}
                Some(_) => {
                    events.push(if *up_now {
                        Event::ServiceUp { name: name.clone() }
                    } else {
                        Event::ServiceDown { name: name.clone() }
                    });
                }
                None if !up_now => {
                    events.push(Event::ServiceDown { name: name.clone() });
                }
                None => {}
            }
        }

        self.last_state = current;
        events
    }
}

/// Run `systemctl is-active <unit1> <unit2> ...`. systemd prints one line per unit
/// in the same order, regardless of overall exit status — non-zero just means at
/// least one unit isn't active, which is fine for our purposes.
async fn scan(units: &[String]) -> Result<Vec<(String, String)>> {
    let mut cmd = Command::new("systemctl");
    cmd.arg("is-active");
    for u in units {
        cmd.arg(u);
    }
    let output = cmd
        .output()
        .await
        .context("running `systemctl is-active`")?;

    let stdout = String::from_utf8(output.stdout)
        .context("systemctl is-active stdout was not valid utf-8")?;
    let states: Vec<String> = stdout.lines().map(|l| l.trim().to_string()).collect();

    if states.len() != units.len() {
        let stderr = String::from_utf8_lossy(&output.stderr);
        bail!(
            "systemctl returned {} state(s) for {} unit(s); stderr: {}",
            states.len(),
            units.len(),
            stderr.trim()
        );
    }

    Ok(units.iter().cloned().zip(states).collect())
}

fn is_up(state: &str) -> bool {
    // `activating` / `reloading` count as up — a brief restart shouldn't fire a
    // service_down alert that auto-resolves a few seconds later.
    matches!(state, "active" | "activating" | "reloading")
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn classifies_states() {
        assert!(is_up("active"));
        assert!(is_up("activating"));
        assert!(is_up("reloading"));
        assert!(!is_up("inactive"));
        assert!(!is_up("failed"));
        assert!(!is_up("deactivating"));
        assert!(!is_up("dead"));
        assert!(!is_up("unknown"));
    }

    #[tokio::test]
    async fn disabled_when_no_units() {
        let mut probe = SystemdProbe::new(vec![]);
        assert!(!probe.enabled());
        assert!(probe.tick().await.is_empty());
    }

    #[tokio::test]
    async fn first_tick_seeds_baseline() {
        let mut probe = SystemdProbe::new(vec![]);
        // Forced into the "no units" path; tick still seeds + emits nothing.
        let events = probe.tick().await;
        assert!(events.is_empty());
    }

    #[test]
    fn diff_emits_only_transitions() {
        let mut probe = SystemdProbe::new(vec!["nginx.service".into(), "redis.service".into()]);
        probe.initialized = true;
        probe.last_state.insert("nginx.service".into(), true);
        probe.last_state.insert("redis.service".into(), true);

        let mut current = HashMap::new();
        current.insert("nginx.service".to_string(), false); // went down
        current.insert("redis.service".to_string(), true); // unchanged

        // Replicate diff loop for testability (the public tick() shells out to
        // systemctl, which we can't run on macOS in CI).
        let mut events = Vec::new();
        for (name, up_now) in &current {
            match probe.last_state.get(name) {
                Some(up_before) if up_before == up_now => {}
                Some(_) => events.push(if *up_now {
                    Event::ServiceUp { name: name.clone() }
                } else {
                    Event::ServiceDown { name: name.clone() }
                }),
                None if !up_now => events.push(Event::ServiceDown { name: name.clone() }),
                None => {}
            }
        }

        assert_eq!(events.len(), 1);
        assert!(matches!(&events[0], Event::ServiceDown { name } if name == "nginx.service"));
    }
}