logfmt_zerocopy/
lib.rs

1use std::str::CharIndices;
2
3pub trait Logfmt<'a> {
4    /// Returns an iterator that yields logfmt key-value pairs.
5    ///
6    /// # Example
7    ///
8    /// ```
9    /// use logfmt_zerocopy::Logfmt;
10    ///
11    /// let line = r#"level=info msg="hello world" count=42"#;
12    ///
13    /// for (key, value) in line.logfmt() {
14    ///     println!("{key}: {value}");
15    /// }
16    /// ```
17    fn logfmt(&'a self) -> Iter<'a>;
18}
19
20impl<'a> Logfmt<'a> for str {
21    fn logfmt(&'a self) -> Iter<'a> {
22        Iter {
23            text: self,
24            chars_indices: self.char_indices(),
25            state: State::Init,
26        }
27    }
28}
29
30/// Iterates over logfmt key-value pairs in a string.
31///
32/// Created by calling [`logfmt()`](Logfmt::logfmt) on a string slice.
33///
34/// # Example
35///
36/// ```
37/// use logfmt_zerocopy::Logfmt;
38///
39/// let line = r#"level=info msg="hello world""#;
40/// let mut iter = line.logfmt();
41///
42/// assert_eq!(iter.next(), Some(("level", "info")));
43/// assert_eq!(iter.next(), Some(("msg", "hello world")));
44/// assert_eq!(iter.next(), None);
45/// ```
46pub struct Iter<'a> {
47    text: &'a str,
48    chars_indices: CharIndices<'a>,
49    state: State,
50}
51
52impl<'a> Iterator for Iter<'a> {
53    type Item = (&'a str, &'a str);
54
55    fn next(&mut self) -> Option<Self::Item> {
56        for (idx, input) in &mut self.chars_indices {
57            let next = self.state.next(idx, input);
58            if let State::ValueEnd(ks, ke, vs, ve) = next {
59                self.state = State::Init;
60                return Some((&self.text[ks..ke], &self.text[vs..ve]));
61            } else {
62                self.state = next;
63            }
64        }
65        match self.state {
66            State::KeyEnd(ks, ke) => {
67                self.state = State::Init;
68                let vs = ke + '='.len_utf8();
69                Some((&self.text[ks..ke], &self.text[vs..]))
70            }
71            State::ValueStart(ks, ke, vs) => {
72                self.state = State::Init;
73                Some((&self.text[ks..ke], &self.text[vs..]))
74            }
75            _ => None,
76        }
77    }
78}
79
80#[derive(Debug, Default, Clone, Copy)]
81enum State {
82    #[default]
83    Init,
84    KeyStart(usize),
85    KeyEnd(usize, usize),
86    ValueStart(usize, usize, usize),
87    ValueStartWithQuote(usize, usize, usize),
88    ValueEnd(usize, usize, usize, usize),
89}
90
91impl State {
92    fn next(self, i: usize, c: char) -> Self {
93        match self {
94            State::Init => match c {
95                _ if c.is_whitespace() => State::Init,
96                _ => State::KeyStart(i),
97            },
98            State::KeyStart(ks) => match c {
99                '=' => State::KeyEnd(ks, i),
100                _ if c.is_whitespace() => State::Init,
101                _ => State::KeyStart(ks),
102            },
103            State::KeyEnd(ks, ke) => match c {
104                '"' => State::ValueStartWithQuote(ks, ke, i + c.len_utf8()),
105                _ if c.is_whitespace() => State::ValueEnd(ks, ke, i, i),
106                _ => State::ValueStart(ks, ke, i),
107            },
108            State::ValueStart(ks, ke, vs) => match c {
109                _ if c.is_whitespace() => State::ValueEnd(ks, ke, vs, i),
110                _ => State::ValueStart(ks, ke, vs),
111            },
112            State::ValueStartWithQuote(ks, ke, vs) => match c {
113                '"' => State::ValueEnd(ks, ke, vs, i),
114                _ => State::ValueStartWithQuote(ks, ke, vs),
115            },
116            State::ValueEnd(_, _, _, _) => State::Init,
117        }
118    }
119}
120
121#[cfg(test)]
122mod tests {
123    use super::*;
124
125    pub fn collect_pairs(input: &str) -> Vec<(&str, &str)> {
126        input.logfmt().collect()
127    }
128
129    #[test]
130    fn logfmt_collect_pairs_all_cases() {
131        let cases: &[(&str, &[(&str, &str)])] = &[
132            // Empty / whitespace-only
133            ("", &[]),
134            (" ", &[]),
135            ("   \t  ", &[]),
136            // Single pair
137            ("a=1", &[("a", "1")]),
138            ("key=value", &[("key", "value")]),
139            // Leading / trailing / repeated separators
140            ("  a=1", &[("a", "1")]),
141            ("a=1  ", &[("a", "1")]),
142            ("a=1   b=2", &[("a", "1"), ("b", "2")]),
143            ("a=1\tb=2", &[("a", "1"), ("b", "2")]),
144            ("a=1 \t  b=2   c=3", &[("a", "1"), ("b", "2"), ("c", "3")]),
145            // Empty value
146            ("a=", &[("a", "")]),
147            ("a= b=2", &[("a", ""), ("b", "2")]),
148            ("a= b=", &[("a", ""), ("b", "")]),
149            // Quoted values (spaces kept inside quotes)
150            (r#"msg="hello world""#, &[("msg", "hello world")]),
151            (
152                r#"a=1 msg="hello world" b=2"#,
153                &[("a", "1"), ("msg", "hello world"), ("b", "2")],
154            ),
155            (
156                r#"msg="  leading and  internal   spaces  ""#,
157                &[("msg", "  leading and  internal   spaces  ")],
158            ),
159            // Quotes but empty
160            (r#"msg="""#, &[("msg", "")]),
161            // Values with punctuation / URL-like content
162            ("path=/var/log/syslog", &[("path", "/var/log/syslog")]),
163            (
164                "url=https://example.com/a?b=c&d=e",
165                &[("url", "https://example.com/a?b=c&d=e")],
166            ),
167            ("ip=127.0.0.1", &[("ip", "127.0.0.1")]),
168            // Duplicate keys (iterator should yield in order)
169            ("a=1 a=2 a=3", &[("a", "1"), ("a", "2"), ("a", "3")]),
170            // Weird-but-valid key shapes (depends on your State machine rules)
171            ("a_b=1", &[("a_b", "1")]),
172            ("a-b=1", &[("a-b", "1")]),
173            ("a.b=1", &[("a.b", "1")]),
174        ];
175
176        for (input, expected) in cases {
177            let got = collect_pairs(input);
178            assert_eq!(
179                got.as_slice(),
180                *expected,
181                "mismatch for input: {input:?}\n  got: {got:?}\n  expected: {expected:?}"
182            );
183        }
184    }
185}