Skip to main content

logfmt_zerocopy/
lib.rs

1use std::str::CharIndices;
2
3pub trait Logfmt<'a> {
4    /// Returns an iterator that yields logfmt key-value pairs.
5    ///
6    /// # Example
7    ///
8    /// ```
9    /// use logfmt_zerocopy::Logfmt;
10    ///
11    /// let line = r#"level=info msg="hello world" count=42"#;
12    ///
13    /// for (key, value) in line.logfmt() {
14    ///     println!("{key}: {value}");
15    /// }
16    /// ```
17    fn logfmt(&'a self) -> Iter<'a>;
18}
19
20impl<'a> Logfmt<'a> for str {
21    fn logfmt(&'a self) -> Iter<'a> {
22        Iter {
23            text: self,
24            chars_indices: self.char_indices(),
25            state: State::Init,
26        }
27    }
28}
29
30/// Iterates over logfmt key-value pairs in a string.
31///
32/// Created by calling [`logfmt()`](Logfmt::logfmt) on a string slice.
33///
34/// # Example
35///
36/// ```
37/// use logfmt_zerocopy::Logfmt;
38///
39/// let line = r#"level=info msg="hello world""#;
40/// let mut iter = line.logfmt();
41///
42/// assert_eq!(iter.next(), Some(("level", "info")));
43/// assert_eq!(iter.next(), Some(("msg", "hello world")));
44/// assert_eq!(iter.next(), None);
45/// ```
46pub struct Iter<'a> {
47    text: &'a str,
48    chars_indices: CharIndices<'a>,
49    state: State,
50}
51
52impl<'a> Iterator for Iter<'a> {
53    type Item = (&'a str, &'a str);
54
55    fn next(&mut self) -> Option<Self::Item> {
56        for (idx, input) in &mut self.chars_indices {
57            let next = self.state.next(idx, input);
58            if let State::ValueEnd(ks, ke, vs, ve) = next {
59                self.state = State::Init;
60                return Some((&self.text[ks..ke], &self.text[vs..ve]));
61            } else {
62                self.state = next;
63            }
64        }
65        match self.state {
66            State::KeyEnd(ks, ke) => {
67                self.state = State::Init;
68                let vs = ke + '='.len_utf8();
69                Some((&self.text[ks..ke], &self.text[vs..]))
70            }
71            State::ValueStart(ks, ke, vs) => {
72                self.state = State::Init;
73                Some((&self.text[ks..ke], &self.text[vs..]))
74            }
75            _ => None,
76        }
77    }
78}
79
80#[derive(Debug, Default, Clone, Copy)]
81enum State {
82    #[default]
83    Init,
84    KeyStart(usize),
85    KeyEnd(usize, usize),
86    ValueStart(usize, usize, usize),
87    ValueStartWithQuote(usize, usize, usize),
88    ValueEscaped(usize, usize, usize),
89    ValueEnd(usize, usize, usize, usize),
90}
91
92impl State {
93    fn next(self, i: usize, c: char) -> Self {
94        match self {
95            State::Init => match c {
96                _ if c.is_whitespace() => State::Init,
97                _ => State::KeyStart(i),
98            },
99            State::KeyStart(ks) => match c {
100                '=' => State::KeyEnd(ks, i),
101                _ if c.is_whitespace() => State::Init,
102                _ => State::KeyStart(ks),
103            },
104            State::KeyEnd(ks, ke) => match c {
105                '"' => State::ValueStartWithQuote(ks, ke, i + c.len_utf8()),
106                _ if c.is_whitespace() => State::ValueEnd(ks, ke, i, i),
107                _ => State::ValueStart(ks, ke, i),
108            },
109            State::ValueStart(ks, ke, vs) => match c {
110                _ if c.is_whitespace() => State::ValueEnd(ks, ke, vs, i),
111                _ => State::ValueStart(ks, ke, vs),
112            },
113            State::ValueStartWithQuote(ks, ke, vs) => match c {
114                '\\' => State::ValueEscaped(ks, ke, vs),
115                '"' => State::ValueEnd(ks, ke, vs, i),
116                _ => State::ValueStartWithQuote(ks, ke, vs),
117            },
118            State::ValueEscaped(ks, ke, vs) => State::ValueStartWithQuote(ks, ke, vs),
119            State::ValueEnd(_, _, _, _) => State::Init,
120        }
121    }
122}
123
124#[cfg(test)]
125mod tests {
126    use super::*;
127
128    pub fn collect_pairs(input: &str) -> Vec<(&str, &str)> {
129        input.logfmt().collect()
130    }
131
132    #[test]
133    fn logfmt_collect_pairs_all_cases() {
134        let cases: &[(&str, &[(&str, &str)])] = &[
135            // Empty / whitespace-only
136            ("", &[]),
137            (" ", &[]),
138            ("   \t  ", &[]),
139            // Single pair
140            ("a=1", &[("a", "1")]),
141            ("key=value", &[("key", "value")]),
142            // Leading / trailing / repeated separators
143            ("  a=1", &[("a", "1")]),
144            ("a=1  ", &[("a", "1")]),
145            ("a=1   b=2", &[("a", "1"), ("b", "2")]),
146            ("a=1\tb=2", &[("a", "1"), ("b", "2")]),
147            ("a=1 \t  b=2   c=3", &[("a", "1"), ("b", "2"), ("c", "3")]),
148            // Empty value
149            ("a=", &[("a", "")]),
150            ("a= b=2", &[("a", ""), ("b", "2")]),
151            ("a= b=", &[("a", ""), ("b", "")]),
152            // Quoted values (spaces kept inside quotes)
153            (r#"msg="hello world""#, &[("msg", "hello world")]),
154            (
155                r#"a=1 msg="hello world" b=2"#,
156                &[("a", "1"), ("msg", "hello world"), ("b", "2")],
157            ),
158            (
159                r#"msg="  leading and  internal   spaces  ""#,
160                &[("msg", "  leading and  internal   spaces  ")],
161            ),
162            // Quotes but empty
163            (r#"msg="""#, &[("msg", "")]),
164            // Escaped quotes inside quoted values
165            (r#"msg="hello \"world\"""#, &[("msg", r#"hello \"world\""#)]),
166            (r#"msg="say \"hi\" ok""#, &[("msg", r#"say \"hi\" ok"#)]),
167            (
168                r#"msg="escaped \\ backslash""#,
169                &[("msg", r#"escaped \\ backslash"#)],
170            ),
171            (r#"a="\"" b=2"#, &[("a", r#"\""#), ("b", "2")]),
172            // Values with punctuation / URL-like content
173            ("path=/var/log/syslog", &[("path", "/var/log/syslog")]),
174            (
175                "url=https://example.com/a?b=c&d=e",
176                &[("url", "https://example.com/a?b=c&d=e")],
177            ),
178            ("ip=127.0.0.1", &[("ip", "127.0.0.1")]),
179            // Duplicate keys (iterator should yield in order)
180            ("a=1 a=2 a=3", &[("a", "1"), ("a", "2"), ("a", "3")]),
181            // Weird-but-valid key shapes (depends on your State machine rules)
182            ("a_b=1", &[("a_b", "1")]),
183            ("a-b=1", &[("a-b", "1")]),
184            ("a.b=1", &[("a.b", "1")]),
185        ];
186
187        for (input, expected) in cases {
188            let got = collect_pairs(input);
189            assert_eq!(
190                got.as_slice(),
191                *expected,
192                "mismatch for input: {input:?}\n  got: {got:?}\n  expected: {expected:?}"
193            );
194        }
195    }
196}