rusm_otp/lifecycle.rs
1//! Opt-in process **lifecycle logging** — the "see what's happening" switch a node
2//! turns on explicitly (`rusm.toml [log] level`). Off by default: the spawn hot path
3//! does nothing. When on, the runtime logs each **labeled** process's spawn and exit to
4//! stderr — so the signal is *components* (which the host labels), not internal plumbing
5//! (responders, writers — left unlabeled).
6//!
7//! This module owns only the platform line's *structure*; the shared look (palette,
8//! column widths, timestamp, tty-gated colour) comes from [`rusm_logfmt`], so platform
9//! and app logs line up when interleaved. The runtime owns the *gate* (the level) and the
10//! *when* (a spawn site, and `deregister` on exit). Lines read `<time> rusm <verb>
11//! <label>#<pid> <detail>`, the spawn line carrying the process's effective capabilities.
12
13use std::collections::BTreeMap;
14
15use rusm_logfmt as fmt;
16
17use crate::exit::ExitReason;
18use crate::pid::Pid;
19
20/// Platform log verbosity, declared via `rusm.toml [log] level`. Ordered, cumulative:
21/// a configured level shows every event at or below it. Each lifecycle event maps to a
22/// distinct level — `Error`: a **crash** (a trap / OOM); `Warn`: + a **kill** (or
23/// cascade); `Info`: + a **clean exit**; `Debug`: + every **spawn**. So a restart reads
24/// as a crash `exit` (Error) then a fresh `spawn` (Debug).
25#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default, Debug)]
26pub enum LogLevel {
27 /// No platform logging (the default — zero hot-path cost).
28 #[default]
29 Off,
30 /// Crashes only (a guest trap / OOM).
31 Error,
32 /// + kills and link cascades.
33 Warn,
34 /// + clean (normal) exits — every process *ending*.
35 Info,
36 /// + every spawn — full lifecycle visibility.
37 Debug,
38}
39
40impl LogLevel {
41 /// Parse a manifest string (`off`/`error`/`warn`/`info`/`debug`); anything else is
42 /// `Off`, so a typo silently quiets rather than crashes.
43 pub fn parse(s: &str) -> Self {
44 match s.trim().to_ascii_lowercase().as_str() {
45 "error" => Self::Error,
46 "warn" | "warning" => Self::Warn,
47 "info" => Self::Info,
48 "debug" | "trace" => Self::Debug,
49 _ => Self::Off,
50 }
51 }
52
53 /// The level of a process **exit** — the single source of truth for both the gate
54 /// (which level shows it) and the colour: a crash is `Error`, a kill/cascade `Warn`,
55 /// a clean exit `Info`.
56 pub fn for_exit(reason: ExitReason) -> Self {
57 match reason {
58 ExitReason::Crashed => Self::Error,
59 ExitReason::Killed | ExitReason::NoProc => Self::Warn,
60 ExitReason::Normal => Self::Info,
61 }
62 }
63
64 /// The shared-palette colour for this level (red crash / yellow kill / green clean;
65 /// cyan otherwise).
66 fn colour(self) -> &'static str {
67 match self {
68 Self::Error => fmt::ERROR,
69 Self::Warn => fmt::WARN,
70 Self::Info => fmt::OK,
71 _ => fmt::LEVEL,
72 }
73 }
74}
75
76/// `<id>` rendered as a bold name + dim `#pid` — the spawned-process **subject** of a
77/// spawn/exit line (distinct from the `who` column the lead already holds).
78fn ident(label: &str, pid: Pid) -> String {
79 format!(
80 "{}{}",
81 fmt::paint(fmt::BOLD, label),
82 fmt::paint(fmt::DIM, &format!("#{}", pid.0))
83 )
84}
85
86/// Log a component **spawn**: `<time> rusm spawn <label>#<pid> <detail>` (detail = its
87/// effective capabilities, so a reader sees exactly what the process can do).
88pub fn log_spawn(pid: Pid, label: &str, detail: &str) {
89 eprintln!(
90 "{}",
91 fmt::platform_line(
92 fmt::LEVEL, // cyan
93 "spawn",
94 &format!("{} {}", ident(label, pid), fmt::paint(fmt::DIM, detail)),
95 )
96 );
97}
98
99/// Log a process **exit**: `<time> rusm exit <label>#<pid> <reason>` — coloured by the
100/// exit's level (red crash / yellow kill / green clean), the same mapping that gated it.
101pub fn log_exit(pid: Pid, label: &str, reason: ExitReason) {
102 let code = LogLevel::for_exit(reason).colour();
103 eprintln!(
104 "{}",
105 fmt::platform_line(
106 code,
107 "exit",
108 &format!(
109 "{} {}",
110 ident(label, pid),
111 fmt::paint(code, &format!("{reason:?}").to_lowercase())
112 ),
113 )
114 );
115}
116
117/// Log a process **census**: `<time> rusm census <comp>=<n> … <tag>=<n> …` — the
118/// count of live processes per component (by label), then per process-group **tag**
119/// (Erlang `pg`) membership. Emitted debounced after process state settles. Bold names,
120/// cyan counts; an idle node reads `(none)`.
121pub fn log_census(components: &BTreeMap<String, u64>, tags: &BTreeMap<String, u64>) {
122 // `<name>=<n>`: component names bold (white), tag names yellow, counts cyan.
123 let entry = |name: &str, n: &u64, name_colour: &str| {
124 format!(
125 "{}{}{}",
126 fmt::paint(name_colour, name),
127 fmt::paint(fmt::DIM, "="),
128 fmt::paint(fmt::LEVEL, &n.to_string())
129 )
130 };
131 let mut entries: Vec<String> = components
132 .iter()
133 .map(|(name, n)| entry(name, n, fmt::BOLD))
134 .collect();
135 entries.extend(tags.iter().map(|(name, n)| entry(name, n, fmt::TAG)));
136 let body = if entries.is_empty() {
137 fmt::paint(fmt::DIM, "(none)")
138 } else {
139 entries.join(" ")
140 };
141 eprintln!("{}", fmt::platform_line(fmt::LEVEL, "census", &body));
142}
143
144/// Log a forced **kill** of one process: `<time> rusm kill #<pid>` (yellow, like the
145/// `exit` it triggers). Emitted when `kill(pid)` actually terminates a live process.
146pub fn log_kill(pid: Pid) {
147 eprintln!(
148 "{}",
149 fmt::platform_line(
150 fmt::WARN,
151 "kill",
152 &fmt::paint(fmt::DIM, &format!("#{}", pid.0)),
153 )
154 );
155}
156
157/// Log a **kill-tag**: terminating a whole process group — `<time> rusm kill <tag> → <n>`
158/// (the tag in yellow, the killed count in cyan). One line per `kill_tag`, regardless of
159/// group size (the members' own `exit` lines follow).
160pub fn log_kill_tag(tag: &str, killed: usize) {
161 let body = format!(
162 "{} {} {}",
163 fmt::paint(fmt::TAG, tag),
164 fmt::paint(fmt::DIM, "→"),
165 fmt::paint(fmt::LEVEL, &killed.to_string()),
166 );
167 eprintln!("{}", fmt::platform_line(fmt::WARN, "kill", &body));
168}
169
170// A supervisor **restart** intentionally has no dedicated event: it reads as the
171// crashed instance's abnormal `exit` line followed by a fresh `spawn` line for the
172// same component — carrying the crash reason and the new pid, which a bare "restart"
173// line could not. (`LogLevel::Info` sits between `Warn` and `Debug` for that reason.)
174
175#[cfg(test)]
176mod tests {
177 use super::LogLevel;
178
179 #[test]
180 fn parse_maps_known_levels_and_quiets_the_rest() {
181 assert_eq!(LogLevel::parse("debug"), LogLevel::Debug);
182 assert_eq!(LogLevel::parse("INFO"), LogLevel::Info);
183 assert_eq!(LogLevel::parse("warning"), LogLevel::Warn);
184 assert_eq!(LogLevel::parse("error"), LogLevel::Error);
185 // Unset or unrecognised quiets to Off — a typo never accidentally goes loud.
186 assert_eq!(LogLevel::parse(""), LogLevel::Off);
187 assert_eq!(LogLevel::parse("loud"), LogLevel::Off);
188 }
189
190 #[test]
191 fn levels_are_ordered_off_to_debug() {
192 assert!(LogLevel::Off < LogLevel::Error);
193 assert!(LogLevel::Error < LogLevel::Warn);
194 assert!(LogLevel::Warn < LogLevel::Info);
195 assert!(LogLevel::Info < LogLevel::Debug);
196 }
197}