Skip to main content

trusty_common/
log_buffer.rs

1//! Bounded in-memory ring buffer of recent tracing log lines.
2//!
3//! Why: Operators debugging a running daemon want the last N log lines
4//!      without SSHing to the box, tailing a file, or restarting with a
5//!      different `RUST_LOG`. A small in-process ring buffer lets the daemon
6//!      serve recent logs over HTTP (`GET /logs/tail`) at near-zero cost and
7//!      with no file I/O. The cap keeps memory bounded on a long-running
8//!      process.
9//! What: [`LogBuffer`] is a thread-safe `VecDeque<String>` capped at a fixed
10//!      capacity; the oldest line is evicted on overflow. [`LogBufferLayer`]
11//!      is a `tracing_subscriber::Layer` that formats every event into one
12//!      line and pushes it onto the buffer. The HTTP handler drains the tail.
13//! Test: see the `tests` module — capacity eviction, tail semantics, and a
14//!      layer-integration test that emits events through a real subscriber.
15
16use std::collections::VecDeque;
17use std::fmt::Write as _;
18use std::sync::{Arc, Mutex};
19
20use tracing::field::{Field, Visit};
21use tracing_subscriber::Layer;
22use tracing_subscriber::layer::Context;
23
24/// Default ring-buffer capacity (lines). Sized so a daemon retains a few
25/// minutes of INFO-level chatter while costing well under 1 MB of RAM.
26pub const DEFAULT_LOG_CAPACITY: usize = 1000;
27
28/// Thread-safe, bounded ring buffer of formatted log lines.
29///
30/// Why: shared between the tracing `Layer` (writer) and the HTTP handler
31///      (reader); both hold cheap `Arc` clones of the same underlying deque.
32/// What: wraps `Arc<Mutex<VecDeque<String>>>`. `push` appends and evicts the
33///      oldest line past capacity; `tail` snapshots the most recent N lines.
34/// Test: `capacity_evicts_oldest`, `tail_returns_last_n`.
35#[derive(Clone, Debug)]
36pub struct LogBuffer {
37    inner: Arc<Mutex<VecDeque<String>>>,
38    capacity: usize,
39}
40
41impl LogBuffer {
42    /// Create an empty buffer with the given line capacity.
43    ///
44    /// Why: callers (daemon startup) choose the cap; tests use a tiny one.
45    /// What: allocates a `VecDeque` with `capacity.max(1)` reserved slots so a
46    ///      zero capacity is treated as one (a zero-cap ring is useless and
47    ///      would panic on the eviction arithmetic).
48    /// Test: `capacity_evicts_oldest`.
49    #[must_use]
50    pub fn new(capacity: usize) -> Self {
51        let capacity = capacity.max(1);
52        Self {
53            inner: Arc::new(Mutex::new(VecDeque::with_capacity(capacity))),
54            capacity,
55        }
56    }
57
58    /// Append a line, evicting the oldest entry when at capacity.
59    ///
60    /// Why: a tracing `Layer` calls this on every event; it must never panic
61    ///      or block long. A poisoned mutex (a prior panic while logging) is
62    ///      recovered via `into_inner` so logging itself never cascades a
63    ///      panic into the daemon.
64    /// What: pushes `line` to the back; if length now exceeds `capacity`,
65    ///      pops the front.
66    /// Test: `capacity_evicts_oldest`.
67    pub fn push(&self, line: String) {
68        let mut guard = match self.inner.lock() {
69            Ok(g) => g,
70            Err(poisoned) => poisoned.into_inner(),
71        };
72        guard.push_back(line);
73        while guard.len() > self.capacity {
74            guard.pop_front();
75        }
76    }
77
78    /// Snapshot the most recent `n` lines (or all, when `n` exceeds the
79    /// current length).
80    ///
81    /// Why: the `/logs/tail` handler returns these as a JSON array. Cloning
82    ///      under the lock keeps the critical section short and lets the
83    ///      caller serialise without holding the mutex.
84    /// What: returns a `Vec<String>` of at most `n` lines, oldest-first.
85    /// Test: `tail_returns_last_n`, `tail_all_when_n_exceeds_len`.
86    #[must_use]
87    pub fn tail(&self, n: usize) -> Vec<String> {
88        let guard = match self.inner.lock() {
89            Ok(g) => g,
90            Err(poisoned) => poisoned.into_inner(),
91        };
92        let skip = guard.len().saturating_sub(n);
93        guard.iter().skip(skip).cloned().collect()
94    }
95
96    /// Total number of lines currently buffered.
97    ///
98    /// Why: the `/logs/tail` response reports `total` so callers can tell
99    ///      whether the buffer has wrapped.
100    /// What: returns the deque length.
101    /// Test: `tail_returns_last_n` asserts `len` after pushes.
102    #[must_use]
103    pub fn len(&self) -> usize {
104        match self.inner.lock() {
105            Ok(g) => g.len(),
106            Err(poisoned) => poisoned.into_inner().len(),
107        }
108    }
109
110    /// Whether the buffer holds no lines.
111    ///
112    /// Why: clippy requires `is_empty` alongside `len`; also a convenient
113    ///      readiness check in tests.
114    /// What: returns `len() == 0`.
115    /// Test: covered by `capacity_evicts_oldest`.
116    #[must_use]
117    pub fn is_empty(&self) -> bool {
118        self.len() == 0
119    }
120}
121
122/// `tracing_subscriber::Layer` that mirrors every event into a [`LogBuffer`].
123///
124/// Why: wiring this layer into the subscriber means the daemon's normal
125///      `tracing::info!` / `warn!` calls are captured for `/logs/tail` with
126///      no extra call sites — the buffer stays in lock-step with stderr.
127/// What: on each event, formats
128///      `<YYYY-MM-DD HH:MM:SS> [<level> <target>] <message> k=v …` into a
129///      single line and pushes it. The leading local-time timestamp (issue
130///      #846) lets the dashboard log view show when each line was emitted.
131///      Level/target/fields are collected via a lightweight `Visit`
132///      implementation.
133/// Test: `layer_captures_events` installs the layer on a real subscriber and
134///      asserts an emitted event lands in the buffer.
135pub struct LogBufferLayer {
136    buffer: LogBuffer,
137}
138
139impl LogBufferLayer {
140    /// Wrap a [`LogBuffer`] as a tracing layer.
141    ///
142    /// Why: the daemon constructs the buffer first (so it can also hand a
143    ///      clone to its HTTP state) and then builds the layer around it.
144    /// What: stores a clone of the buffer handle.
145    /// Test: `layer_captures_events`.
146    #[must_use]
147    pub fn new(buffer: LogBuffer) -> Self {
148        Self { buffer }
149    }
150}
151
152/// Field visitor that accumulates an event's message and key/value fields
153/// into a single human-readable string.
154///
155/// Why: tracing events expose their data only through the `Visit` callback;
156///      we render it to text once so the buffer stores plain `String`s.
157/// What: the canonical `message` field becomes the line body; every other
158///      field is appended as ` key=value`.
159/// Test: exercised indirectly by `layer_captures_events`.
160struct LineVisitor {
161    message: String,
162    fields: String,
163}
164
165impl LineVisitor {
166    fn new() -> Self {
167        Self {
168            message: String::new(),
169            fields: String::new(),
170        }
171    }
172}
173
174impl Visit for LineVisitor {
175    fn record_debug(&mut self, field: &Field, value: &dyn std::fmt::Debug) {
176        if field.name() == "message" {
177            // `{:?}` on the message preserves it without surrounding quotes
178            // for string payloads in practice; use Display-ish formatting.
179            let _ = write!(self.message, "{value:?}");
180        } else {
181            let _ = write!(self.fields, " {}={value:?}", field.name());
182        }
183    }
184
185    fn record_str(&mut self, field: &Field, value: &str) {
186        if field.name() == "message" {
187            self.message.push_str(value);
188        } else {
189            let _ = write!(self.fields, " {}={value}", field.name());
190        }
191    }
192}
193
194impl<S: tracing::Subscriber> Layer<S> for LogBufferLayer {
195    fn on_event(&self, event: &tracing::Event<'_>, _ctx: Context<'_, S>) {
196        let meta = event.metadata();
197        let mut visitor = LineVisitor::new();
198        event.record(&mut visitor);
199        // Trim the leading `"` artefact that `{:?}` adds for the message when
200        // the payload was a quoted string literal — keep lines readable.
201        let message = visitor.message.trim_matches('"');
202        // Prepend a local-time timestamp (issue #846) so the dashboard log
203        // view shows per-line timing. We use `chrono::Local` directly rather
204        // than `tracing_subscriber::fmt::time::LocalTime`, which is unsound in
205        // multithreaded programs.
206        let ts = chrono::Local::now().format("%Y-%m-%d %H:%M:%S");
207        let line = format!(
208            "{} [{} {}] {}{}",
209            ts,
210            meta.level(),
211            meta.target(),
212            message,
213            visitor.fields
214        );
215        self.buffer.push(line);
216    }
217}
218
219#[cfg(test)]
220mod tests {
221    use super::*;
222
223    #[test]
224    fn capacity_evicts_oldest() {
225        let buf = LogBuffer::new(3);
226        assert!(buf.is_empty());
227        for i in 0..5 {
228            buf.push(format!("line {i}"));
229        }
230        // Capacity 3 → only the last three survive.
231        assert_eq!(buf.len(), 3);
232        assert_eq!(buf.tail(10), vec!["line 2", "line 3", "line 4"]);
233    }
234
235    #[test]
236    fn tail_returns_last_n() {
237        let buf = LogBuffer::new(100);
238        for i in 0..10 {
239            buf.push(format!("l{i}"));
240        }
241        assert_eq!(buf.len(), 10);
242        assert_eq!(buf.tail(3), vec!["l7", "l8", "l9"]);
243    }
244
245    #[test]
246    fn tail_all_when_n_exceeds_len() {
247        let buf = LogBuffer::new(100);
248        buf.push("only".to_string());
249        assert_eq!(buf.tail(50), vec!["only"]);
250        assert_eq!(buf.tail(0), Vec::<String>::new());
251    }
252
253    #[test]
254    fn zero_capacity_treated_as_one() {
255        let buf = LogBuffer::new(0);
256        buf.push("a".to_string());
257        buf.push("b".to_string());
258        assert_eq!(buf.tail(10), vec!["b"]);
259    }
260
261    #[test]
262    fn layer_captures_events() {
263        use tracing_subscriber::layer::SubscriberExt;
264
265        let buffer = LogBuffer::new(10);
266        let subscriber = tracing_subscriber::registry().with(LogBufferLayer::new(buffer.clone()));
267        tracing::subscriber::with_default(subscriber, || {
268            tracing::info!(answer = 42, "hello from test");
269        });
270        let lines = buffer.tail(10);
271        assert_eq!(lines.len(), 1, "expected one captured line, got {lines:?}");
272        let line = &lines[0];
273        assert!(line.contains("hello from test"), "line was: {line}");
274        assert!(line.contains("answer=42"), "line was: {line}");
275        assert!(line.contains("INFO"), "line was: {line}");
276
277        // Issue #846: every line is prefixed with a `YYYY-MM-DD HH:MM:SS `
278        // local-time timestamp. Lock in the shape without over-fitting to a
279        // specific clock value: 4-digit year, then '-', then a space-delimited
280        // time component, with the level appearing after the timestamp.
281        let bytes = line.as_bytes();
282        assert!(
283            bytes.len() >= 19,
284            "line too short to hold a timestamp: {line}"
285        );
286        assert!(
287            bytes[0..4].iter().all(u8::is_ascii_digit),
288            "expected a 4-digit year prefix, line was: {line}"
289        );
290        assert_eq!(
291            bytes[4], b'-',
292            "expected '-' after the year, line was: {line}"
293        );
294        // The timestamp must come before the level/target bracket.
295        let ts_end = line
296            .find(" [")
297            .expect("expected a ' [' after the timestamp");
298        let ts = &line[..ts_end];
299        assert_eq!(
300            ts.len(),
301            19,
302            "timestamp should be exactly 'YYYY-MM-DD HH:MM:SS', got: {ts:?}"
303        );
304    }
305}