Skip to main content

rsigma_runtime/parse/
logfmt.rs

1//! Zero-dependency logfmt parser.
2//!
3//! Parses the [logfmt](https://brandur.org/logfmt) key=value format used by
4//! Heroku, Go's `logrus`, and many other structured logging libraries.
5//!
6//! Supported syntax:
7//!
8//! | Input               | Key      | Value          |
9//! |---------------------|----------|----------------|
10//! | `key=value`         | `key`    | `"value"`      |
11//! | `key="quoted val"`  | `key`    | `"quoted val"` |
12//! | `key=`              | `key`    | `""`           |
13//! | `key`               | `key`    | `"true"`       |
14//! | `key="esc\"ape"`    | `key`    | `esc"ape`      |
15//! | `key="back\\slash"` | `key`    | `back\slash`   |
16//!
17//! # Example
18//!
19//! ```
20//! use rsigma_runtime::parse::logfmt::parse;
21//!
22//! let pairs = parse(r#"level=info msg="request handled" duration=12ms"#);
23//! assert_eq!(pairs.len(), 3);
24//! assert_eq!(pairs[0], ("level".into(), "info".into()));
25//! assert_eq!(pairs[1], ("msg".into(), "request handled".into()));
26//! assert_eq!(pairs[2], ("duration".into(), "12ms".into()));
27//! ```
28
29/// Parse a logfmt line into key-value pairs.
30///
31/// Bare keys (no `=`) are mapped to the value `"true"`.
32/// Empty values (`key=`) are mapped to `""`.
33/// Returns an empty vec for blank input.
34pub fn parse(input: &str) -> Vec<(String, String)> {
35    let mut pairs = Vec::new();
36    let bytes = input.as_bytes();
37    let len = bytes.len();
38    let mut pos = 0;
39
40    while pos < len {
41        // Skip whitespace between pairs.
42        if bytes[pos] == b' ' || bytes[pos] == b'\t' {
43            pos += 1;
44            continue;
45        }
46
47        // Parse key: everything up to '=' or whitespace.
48        let key_start = pos;
49        while pos < len && bytes[pos] != b'=' && bytes[pos] != b' ' && bytes[pos] != b'\t' {
50            pos += 1;
51        }
52
53        // Empty key (e.g. leading `=`) — skip to next whitespace.
54        if pos == key_start {
55            while pos < len && bytes[pos] != b' ' && bytes[pos] != b'\t' {
56                pos += 1;
57            }
58            continue;
59        }
60
61        let key = input[key_start..pos].to_string();
62
63        // Bare key (no `=` follows): value is "true".
64        if pos >= len || bytes[pos] != b'=' {
65            pairs.push((key, "true".to_string()));
66            continue;
67        }
68
69        // Skip the '='.
70        pos += 1;
71
72        // Parse value.
73        let value = if pos < len && bytes[pos] == b'"' {
74            // Quoted value: consume until closing unescaped `"`.
75            pos += 1; // skip opening quote
76            parse_quoted(bytes, &mut pos)
77        } else {
78            // Unquoted value: consume until whitespace or end.
79            let val_start = pos;
80            while pos < len && bytes[pos] != b' ' && bytes[pos] != b'\t' {
81                pos += 1;
82            }
83            input[val_start..pos].to_string()
84        };
85
86        pairs.push((key, value));
87    }
88
89    pairs
90}
91
92/// Parse a quoted value starting just after the opening `"`.
93///
94/// Handles `\"` and `\\` escape sequences. Advances `pos` past the closing
95/// quote (or to end-of-input if the closing quote is missing).
96fn parse_quoted(bytes: &[u8], pos: &mut usize) -> String {
97    let mut buf = String::new();
98    let len = bytes.len();
99
100    while *pos < len {
101        let b = bytes[*pos];
102        match b {
103            b'\\' if *pos + 1 < len => {
104                let next = bytes[*pos + 1];
105                match next {
106                    b'"' | b'\\' => {
107                        buf.push(next as char);
108                        *pos += 2;
109                    }
110                    // Unknown escape — preserve the backslash literally.
111                    _ => {
112                        buf.push('\\');
113                        *pos += 1;
114                    }
115                }
116            }
117            b'"' => {
118                *pos += 1; // skip closing quote
119                return buf;
120            }
121            _ => {
122                buf.push(b as char);
123                *pos += 1;
124            }
125        }
126    }
127
128    // Unterminated quote — return what we have.
129    buf
130}
131
132#[cfg(test)]
133mod tests {
134    use super::*;
135
136    #[test]
137    fn basic_pairs() {
138        let pairs = parse("level=info msg=hello");
139        assert_eq!(
140            pairs,
141            vec![
142                ("level".into(), "info".into()),
143                ("msg".into(), "hello".into()),
144            ]
145        );
146    }
147
148    #[test]
149    fn quoted_value() {
150        let pairs = parse(r#"msg="hello world""#);
151        assert_eq!(pairs, vec![("msg".into(), "hello world".into())]);
152    }
153
154    #[test]
155    fn escaped_quote_in_value() {
156        let pairs = parse(r#"msg="say \"hi\"""#);
157        assert_eq!(pairs, vec![("msg".into(), r#"say "hi""#.into())]);
158    }
159
160    #[test]
161    fn escaped_backslash_in_value() {
162        let pairs = parse(r#"path="C:\\Users\\admin""#);
163        assert_eq!(pairs, vec![("path".into(), r"C:\Users\admin".into())]);
164    }
165
166    #[test]
167    fn unknown_escape_preserved() {
168        let pairs = parse(r#"msg="hello\nworld""#);
169        assert_eq!(pairs, vec![("msg".into(), r"hello\nworld".into())]);
170    }
171
172    #[test]
173    fn bare_key() {
174        let pairs = parse("debug level=info");
175        assert_eq!(
176            pairs,
177            vec![
178                ("debug".into(), "true".into()),
179                ("level".into(), "info".into()),
180            ]
181        );
182    }
183
184    #[test]
185    fn empty_value() {
186        let pairs = parse("key=");
187        assert_eq!(pairs, vec![("key".into(), String::new())]);
188    }
189
190    #[test]
191    fn empty_quoted_value() {
192        let pairs = parse(r#"key="""#);
193        assert_eq!(pairs, vec![("key".into(), String::new())]);
194    }
195
196    #[test]
197    fn multiple_spaces_between_pairs() {
198        let pairs = parse("a=1   b=2");
199        assert_eq!(
200            pairs,
201            vec![("a".into(), "1".into()), ("b".into(), "2".into())]
202        );
203    }
204
205    #[test]
206    fn tabs_as_separators() {
207        let pairs = parse("a=1\tb=2");
208        assert_eq!(
209            pairs,
210            vec![("a".into(), "1".into()), ("b".into(), "2".into())]
211        );
212    }
213
214    #[test]
215    fn leading_and_trailing_whitespace() {
216        let pairs = parse("  a=1 b=2  ");
217        assert_eq!(
218            pairs,
219            vec![("a".into(), "1".into()), ("b".into(), "2".into())]
220        );
221    }
222
223    #[test]
224    fn empty_input() {
225        assert!(parse("").is_empty());
226    }
227
228    #[test]
229    fn whitespace_only() {
230        assert!(parse("   ").is_empty());
231    }
232
233    #[test]
234    fn unterminated_quote() {
235        let pairs = parse(r#"msg="hello world"#);
236        assert_eq!(pairs, vec![("msg".into(), "hello world".into())]);
237    }
238
239    #[test]
240    fn leading_equals_skipped() {
241        let pairs = parse("=garbage a=1");
242        assert_eq!(pairs, vec![("a".into(), "1".into())]);
243    }
244
245    #[test]
246    fn mixed_types() {
247        let pairs = parse(r#"ts=2024-01-15T10:30:00Z level=error msg="disk full" retry=3 fatal"#);
248        assert_eq!(
249            pairs,
250            vec![
251                ("ts".into(), "2024-01-15T10:30:00Z".into()),
252                ("level".into(), "error".into()),
253                ("msg".into(), "disk full".into()),
254                ("retry".into(), "3".into()),
255                ("fatal".into(), "true".into()),
256            ]
257        );
258    }
259
260    #[test]
261    fn real_world_heroku_log() {
262        let line = r#"at=info method=GET path="/" host=example.com request_id=abc-123 fwd="10.0.0.1" dyno=web.1 connect=1ms service=4ms status=200 bytes=1234"#;
263        let pairs = parse(line);
264        assert_eq!(pairs.len(), 11);
265        assert_eq!(pairs[0], ("at".into(), "info".into()));
266        assert_eq!(pairs[2], ("path".into(), "/".into()));
267        assert_eq!(pairs[4], ("request_id".into(), "abc-123".into()));
268        assert_eq!(pairs[5], ("fwd".into(), "10.0.0.1".into()));
269        assert_eq!(pairs[10], ("bytes".into(), "1234".into()));
270    }
271
272    #[test]
273    fn real_world_go_logrus() {
274        let line =
275            r#"time="2024-01-15T10:30:00Z" level=warning msg="connection reset" component=db"#;
276        let pairs = parse(line);
277        assert_eq!(pairs.len(), 4);
278        assert_eq!(pairs[0], ("time".into(), "2024-01-15T10:30:00Z".into()));
279        assert_eq!(pairs[1], ("level".into(), "warning".into()));
280        assert_eq!(pairs[2], ("msg".into(), "connection reset".into()));
281    }
282
283    #[test]
284    fn consecutive_bare_keys() {
285        let pairs = parse("verbose debug trace");
286        assert_eq!(
287            pairs,
288            vec![
289                ("verbose".into(), "true".into()),
290                ("debug".into(), "true".into()),
291                ("trace".into(), "true".into()),
292            ]
293        );
294    }
295
296    #[test]
297    fn value_with_equals_sign() {
298        // In unquoted values, `=` is just a regular character after the first split.
299        // `key=a=b` should give key="a=b" (greedy unquoted value up to whitespace).
300        let pairs = parse("expr=a=b");
301        assert_eq!(pairs, vec![("expr".into(), "a=b".into())]);
302    }
303
304    #[test]
305    fn quoted_value_with_spaces_and_equals() {
306        let pairs = parse(r#"filter="status=200 method=GET""#);
307        assert_eq!(
308            pairs,
309            vec![("filter".into(), "status=200 method=GET".into())]
310        );
311    }
312}