Skip to main content

rusm_otp/
lifecycle.rs

1//! Opt-in process **lifecycle logging** — the "see what's happening" switch a node
2//! turns on explicitly (`rusm.toml [log] level`). Off by default: the spawn hot path
3//! does nothing. When on, the runtime logs each **labeled** process's spawn and exit to
4//! stderr — so the signal is *components* (which the host labels), not internal plumbing
5//! (responders, writers — left unlabeled).
6//!
7//! This module owns only the platform line's *structure*; the shared look (palette,
8//! column widths, timestamp, tty-gated colour) comes from [`rusm_logfmt`], so platform
9//! and app logs line up when interleaved. The runtime owns the *gate* (the level) and the
10//! *when* (a spawn site, and `deregister` on exit). Lines read `<time> rusm <verb>
11//! <label>#<pid>  <detail>`, the spawn line carrying the process's effective capabilities.
12
13use std::collections::BTreeMap;
14
15use rusm_logfmt as fmt;
16
17use crate::exit::ExitReason;
18use crate::pid::Pid;
19
20/// Platform log verbosity, declared via `rusm.toml [log] level`. Ordered, cumulative:
21/// a configured level shows every event at or below it. Each lifecycle event maps to a
22/// distinct level — `Error`: a **crash** (a trap / OOM); `Warn`: + a **kill** (or
23/// cascade); `Info`: + a **clean exit**; `Debug`: + every **spawn**. So a restart reads
24/// as a crash `exit` (Error) then a fresh `spawn` (Debug).
25#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default, Debug)]
26pub enum LogLevel {
27    /// No platform logging (the default — zero hot-path cost).
28    #[default]
29    Off,
30    /// Crashes only (a guest trap / OOM).
31    Error,
32    /// + kills and link cascades.
33    Warn,
34    /// + clean (normal) exits — every process *ending*.
35    Info,
36    /// + every spawn — full lifecycle visibility.
37    Debug,
38}
39
40impl LogLevel {
41    /// Parse a manifest string (`off`/`error`/`warn`/`info`/`debug`); anything else is
42    /// `Off`, so a typo silently quiets rather than crashes.
43    pub fn parse(s: &str) -> Self {
44        match s.trim().to_ascii_lowercase().as_str() {
45            "error" => Self::Error,
46            "warn" | "warning" => Self::Warn,
47            "info" => Self::Info,
48            "debug" | "trace" => Self::Debug,
49            _ => Self::Off,
50        }
51    }
52
53    /// The level of a process **exit** — the single source of truth for both the gate
54    /// (which level shows it) and the colour: a crash is `Error`, a kill/cascade `Warn`,
55    /// a clean exit `Info`.
56    pub fn for_exit(reason: ExitReason) -> Self {
57        match reason {
58            ExitReason::Crashed => Self::Error,
59            ExitReason::Killed | ExitReason::NoProc => Self::Warn,
60            ExitReason::Normal => Self::Info,
61        }
62    }
63
64    /// The shared-palette colour for this level (red crash / yellow kill / green clean;
65    /// cyan otherwise).
66    fn colour(self) -> &'static str {
67        match self {
68            Self::Error => fmt::ERROR,
69            Self::Warn => fmt::WARN,
70            Self::Info => fmt::OK,
71            _ => fmt::LEVEL,
72        }
73    }
74}
75
76/// `<id>` rendered as a bold name + dim `#pid` — the spawned-process **subject** of a
77/// spawn/exit line (distinct from the `who` column the lead already holds).
78fn ident(label: &str, pid: Pid) -> String {
79    format!(
80        "{}{}",
81        fmt::paint(fmt::BOLD, label),
82        fmt::paint(fmt::DIM, &format!("#{}", pid.0))
83    )
84}
85
86/// Log a component **spawn**: `<time> rusm spawn <label>#<pid>  <detail>` (detail = its
87/// effective capabilities, so a reader sees exactly what the process can do).
88pub fn log_spawn(pid: Pid, label: &str, detail: &str) {
89    eprintln!(
90        "{}",
91        fmt::platform_line(
92            fmt::LEVEL, // cyan
93            "spawn",
94            &format!("{}  {}", ident(label, pid), fmt::paint(fmt::DIM, detail)),
95        )
96    );
97}
98
99/// Log a process **exit**: `<time> rusm exit  <label>#<pid>  <reason>` — coloured by the
100/// exit's level (red crash / yellow kill / green clean), the same mapping that gated it.
101pub fn log_exit(pid: Pid, label: &str, reason: ExitReason) {
102    let code = LogLevel::for_exit(reason).colour();
103    eprintln!(
104        "{}",
105        fmt::platform_line(
106            code,
107            "exit",
108            &format!(
109                "{}  {}",
110                ident(label, pid),
111                fmt::paint(code, &format!("{reason:?}").to_lowercase())
112            ),
113        )
114    );
115}
116
117/// Log a process **census**: `<time> rusm census  <comp>=<n>  …  <tag>=<n>  …` — the
118/// count of live processes per component (by label), then per process-group **tag**
119/// (Erlang `pg`) membership. Emitted debounced after process state settles. Bold names,
120/// cyan counts; an idle node reads `(none)`.
121pub fn log_census(components: &BTreeMap<String, u64>, tags: &BTreeMap<String, u64>) {
122    // `<name>=<n>`: component names bold (white), tag names yellow, counts cyan.
123    let entry = |name: &str, n: &u64, name_colour: &str| {
124        format!(
125            "{}{}{}",
126            fmt::paint(name_colour, name),
127            fmt::paint(fmt::DIM, "="),
128            fmt::paint(fmt::LEVEL, &n.to_string())
129        )
130    };
131    let mut entries: Vec<String> = components
132        .iter()
133        .map(|(name, n)| entry(name, n, fmt::BOLD))
134        .collect();
135    entries.extend(tags.iter().map(|(name, n)| entry(name, n, fmt::TAG)));
136    let body = if entries.is_empty() {
137        fmt::paint(fmt::DIM, "(none)")
138    } else {
139        entries.join("  ")
140    };
141    eprintln!("{}", fmt::platform_line(fmt::LEVEL, "census", &body));
142}
143
144/// Log a forced **kill** of one process: `<time> rusm kill  #<pid>` (yellow, like the
145/// `exit` it triggers). Emitted when `kill(pid)` actually terminates a live process.
146pub fn log_kill(pid: Pid) {
147    eprintln!(
148        "{}",
149        fmt::platform_line(
150            fmt::WARN,
151            "kill",
152            &fmt::paint(fmt::DIM, &format!("#{}", pid.0)),
153        )
154    );
155}
156
157/// Log a **kill-tag**: terminating a whole process group — `<time> rusm kill  <tag> → <n>`
158/// (the tag in yellow, the killed count in cyan). One line per `kill_tag`, regardless of
159/// group size (the members' own `exit` lines follow).
160pub fn log_kill_tag(tag: &str, killed: usize) {
161    let body = format!(
162        "{} {} {}",
163        fmt::paint(fmt::TAG, tag),
164        fmt::paint(fmt::DIM, "→"),
165        fmt::paint(fmt::LEVEL, &killed.to_string()),
166    );
167    eprintln!("{}", fmt::platform_line(fmt::WARN, "kill", &body));
168}
169
170// A supervisor **restart** intentionally has no dedicated event: it reads as the
171// crashed instance's abnormal `exit` line followed by a fresh `spawn` line for the
172// same component — carrying the crash reason and the new pid, which a bare "restart"
173// line could not. (`LogLevel::Info` sits between `Warn` and `Debug` for that reason.)
174
175#[cfg(test)]
176mod tests {
177    use super::LogLevel;
178
179    #[test]
180    fn parse_maps_known_levels_and_quiets_the_rest() {
181        assert_eq!(LogLevel::parse("debug"), LogLevel::Debug);
182        assert_eq!(LogLevel::parse("INFO"), LogLevel::Info);
183        assert_eq!(LogLevel::parse("warning"), LogLevel::Warn);
184        assert_eq!(LogLevel::parse("error"), LogLevel::Error);
185        // Unset or unrecognised quiets to Off — a typo never accidentally goes loud.
186        assert_eq!(LogLevel::parse(""), LogLevel::Off);
187        assert_eq!(LogLevel::parse("loud"), LogLevel::Off);
188    }
189
190    #[test]
191    fn levels_are_ordered_off_to_debug() {
192        assert!(LogLevel::Off < LogLevel::Error);
193        assert!(LogLevel::Error < LogLevel::Warn);
194        assert!(LogLevel::Warn < LogLevel::Info);
195        assert!(LogLevel::Info < LogLevel::Debug);
196    }
197}