Skip to main content

microsandbox_utils/
log_text.rs

1//! Text helpers for parsing/rendering captured log lines.
2//!
3//! Shared by the SDK's `microsandbox::sandbox::logs` reader and the
4//! CLI's `msb logs` renderer — both consume the same on-disk JSON
5//! Lines format and need the same low-level transforms.
6
7//--------------------------------------------------------------------------------------------------
8// Functions
9//--------------------------------------------------------------------------------------------------
10
11/// Strip ANSI escape sequences (CSI, OSC, two-byte C1).
12///
13/// Hand-rolled state machine. Avoids pulling the `regex` crate just
14/// for one fixed pattern. Handles:
15///
16/// - `\x1b[…<final>` — CSI (SGR colors, cursor moves). Final byte is
17///   in `0x40..=0x7e`.
18/// - `\x1b]…\x07` and `\x1b]…\x1b\\` — OSC (terminated by BEL or ST).
19/// - `\x1b<X>` for X in `0x40..=0x5f` — two-byte C1 controls.
20pub fn strip_ansi(input: &str) -> String {
21    let mut out = String::with_capacity(input.len());
22    let mut chars = input.chars().peekable();
23    while let Some(c) = chars.next() {
24        if c != '\x1b' {
25            out.push(c);
26            continue;
27        }
28        match chars.next() {
29            Some('[') => {
30                for c in chars.by_ref() {
31                    if matches!(c, '\x40'..='\x7e') {
32                        break;
33                    }
34                }
35            }
36            Some(']') => {
37                while let Some(c) = chars.next() {
38                    if c == '\x07' {
39                        break;
40                    }
41                    if c == '\x1b' && chars.peek() == Some(&'\\') {
42                        chars.next();
43                        break;
44                    }
45                }
46            }
47            Some(_) => {}
48            None => break,
49        }
50    }
51    out
52}
53
54/// Split a `runtime.log`/`kernel.log` line into a leading RFC 3339
55/// timestamp token (ending in `Z`, ≥20 chars) and the rest of the
56/// line. Returns `None` if the first whitespace-delimited token isn't
57/// a plausible timestamp.
58pub fn split_leading_timestamp(line: &str) -> Option<(&str, &str)> {
59    let (first, rest) = line.split_once(char::is_whitespace)?;
60    if first.len() >= 20 && first.ends_with('Z') {
61        Some((first, rest))
62    } else {
63        None
64    }
65}
66
67/// Decode a standard-alphabet base64 string. Returns `None` on
68/// malformed input. Used for the opt-in raw-mode `e: "b64"` log
69/// entries; small enough that pulling in the `base64` crate isn't
70/// justified.
71pub fn base64_decode(s: &str) -> Option<Vec<u8>> {
72    static TABLE: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
73    let bytes = s.trim().as_bytes();
74    if bytes.is_empty() {
75        return Some(Vec::new());
76    }
77    if !bytes.len().is_multiple_of(4) {
78        return None;
79    }
80    let mut out = Vec::with_capacity(bytes.len() / 4 * 3);
81    for chunk in bytes.chunks(4) {
82        let mut vals = [0u8; 4];
83        let mut pad = 0usize;
84        for (i, &b) in chunk.iter().enumerate() {
85            if b == b'=' {
86                pad += 1;
87                vals[i] = 0;
88            } else {
89                let idx = TABLE.iter().position(|&t| t == b)?;
90                vals[i] = idx as u8;
91            }
92        }
93        let n = ((vals[0] as u32) << 18)
94            | ((vals[1] as u32) << 12)
95            | ((vals[2] as u32) << 6)
96            | (vals[3] as u32);
97        out.push(((n >> 16) & 0xff) as u8);
98        if pad < 2 {
99            out.push(((n >> 8) & 0xff) as u8);
100        }
101        if pad < 1 {
102            out.push((n & 0xff) as u8);
103        }
104    }
105    Some(out)
106}
107
108//--------------------------------------------------------------------------------------------------
109// Tests
110//--------------------------------------------------------------------------------------------------
111
112#[cfg(test)]
113mod tests {
114    use super::*;
115
116    #[test]
117    fn strip_ansi_removes_color_and_cursor() {
118        let s = "\x1b[31merror\x1b[0m\x1b[2J\x1b[H text";
119        assert_eq!(strip_ansi(s), "error text");
120    }
121
122    #[test]
123    fn strip_ansi_preserves_plain_text() {
124        let s = "hello\nworld\n";
125        assert_eq!(strip_ansi(s), s);
126    }
127
128    #[test]
129    fn split_leading_timestamp_picks_first_token() {
130        let line = "2026-04-30T20:32:59.690Z  INFO some message";
131        let (t, rest) = split_leading_timestamp(line).unwrap();
132        assert_eq!(t, "2026-04-30T20:32:59.690Z");
133        assert!(rest.trim_start().starts_with("INFO"));
134    }
135
136    #[test]
137    fn split_leading_timestamp_returns_none_for_unstructured() {
138        let line = "[ 0.123] kernel boot message";
139        assert!(split_leading_timestamp(line).is_none());
140    }
141
142    #[test]
143    fn base64_decode_basic() {
144        assert_eq!(base64_decode("aGVsbG8=").unwrap(), b"hello");
145        assert_eq!(base64_decode("").unwrap(), Vec::<u8>::new());
146    }
147}