use std::str::CharIndices;
pub trait Logfmt<'a> {
fn logfmt(&'a self) -> Iter<'a>;
}
impl<'a> Logfmt<'a> for str {
fn logfmt(&'a self) -> Iter<'a> {
Iter {
text: self,
chars_indices: self.char_indices(),
state: State::Init,
}
}
}
pub struct Iter<'a> {
text: &'a str,
chars_indices: CharIndices<'a>,
state: State,
}
impl<'a> Iterator for Iter<'a> {
type Item = (&'a str, &'a str);
fn next(&mut self) -> Option<Self::Item> {
for (idx, input) in &mut self.chars_indices {
let next = self.state.next(idx, input);
if let State::ValueEnd(ks, ke, vs, ve) = next {
self.state = State::Init;
return Some((&self.text[ks..ke], &self.text[vs..ve]));
} else {
self.state = next;
}
}
match self.state {
State::KeyEnd(ks, ke) => {
self.state = State::Init;
let vs = ke + '='.len_utf8();
Some((&self.text[ks..ke], &self.text[vs..]))
}
State::ValueStart(ks, ke, vs) => {
self.state = State::Init;
Some((&self.text[ks..ke], &self.text[vs..]))
}
_ => None,
}
}
}
#[derive(Debug, Default, Clone, Copy)]
enum State {
#[default]
Init,
KeyStart(usize),
KeyEnd(usize, usize),
ValueStart(usize, usize, usize),
ValueStartWithQuote(usize, usize, usize),
ValueEscaped(usize, usize, usize),
ValueEnd(usize, usize, usize, usize),
}
impl State {
fn next(self, i: usize, c: char) -> Self {
match self {
State::Init => match c {
_ if c.is_whitespace() => State::Init,
_ => State::KeyStart(i),
},
State::KeyStart(ks) => match c {
'=' => State::KeyEnd(ks, i),
_ if c.is_whitespace() => State::Init,
_ => State::KeyStart(ks),
},
State::KeyEnd(ks, ke) => match c {
'"' => State::ValueStartWithQuote(ks, ke, i + c.len_utf8()),
_ if c.is_whitespace() => State::ValueEnd(ks, ke, i, i),
_ => State::ValueStart(ks, ke, i),
},
State::ValueStart(ks, ke, vs) => match c {
_ if c.is_whitespace() => State::ValueEnd(ks, ke, vs, i),
_ => State::ValueStart(ks, ke, vs),
},
State::ValueStartWithQuote(ks, ke, vs) => match c {
'\\' => State::ValueEscaped(ks, ke, vs),
'"' => State::ValueEnd(ks, ke, vs, i),
_ => State::ValueStartWithQuote(ks, ke, vs),
},
State::ValueEscaped(ks, ke, vs) => State::ValueStartWithQuote(ks, ke, vs),
State::ValueEnd(_, _, _, _) => State::Init,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
pub fn collect_pairs(input: &str) -> Vec<(&str, &str)> {
input.logfmt().collect()
}
#[test]
fn logfmt_collect_pairs_all_cases() {
let cases: &[(&str, &[(&str, &str)])] = &[
("", &[]),
(" ", &[]),
(" \t ", &[]),
("a=1", &[("a", "1")]),
("key=value", &[("key", "value")]),
(" a=1", &[("a", "1")]),
("a=1 ", &[("a", "1")]),
("a=1 b=2", &[("a", "1"), ("b", "2")]),
("a=1\tb=2", &[("a", "1"), ("b", "2")]),
("a=1 \t b=2 c=3", &[("a", "1"), ("b", "2"), ("c", "3")]),
("a=", &[("a", "")]),
("a= b=2", &[("a", ""), ("b", "2")]),
("a= b=", &[("a", ""), ("b", "")]),
(r#"msg="hello world""#, &[("msg", "hello world")]),
(
r#"a=1 msg="hello world" b=2"#,
&[("a", "1"), ("msg", "hello world"), ("b", "2")],
),
(
r#"msg=" leading and internal spaces ""#,
&[("msg", " leading and internal spaces ")],
),
(r#"msg="""#, &[("msg", "")]),
(r#"msg="hello \"world\"""#, &[("msg", r#"hello \"world\""#)]),
(r#"msg="say \"hi\" ok""#, &[("msg", r#"say \"hi\" ok"#)]),
(
r#"msg="escaped \\ backslash""#,
&[("msg", r#"escaped \\ backslash"#)],
),
(r#"a="\"" b=2"#, &[("a", r#"\""#), ("b", "2")]),
("path=/var/log/syslog", &[("path", "/var/log/syslog")]),
(
"url=https://example.com/a?b=c&d=e",
&[("url", "https://example.com/a?b=c&d=e")],
),
("ip=127.0.0.1", &[("ip", "127.0.0.1")]),
("a=1 a=2 a=3", &[("a", "1"), ("a", "2"), ("a", "3")]),
("a_b=1", &[("a_b", "1")]),
("a-b=1", &[("a-b", "1")]),
("a.b=1", &[("a.b", "1")]),
];
for (input, expected) in cases {
let got = collect_pairs(input);
assert_eq!(
got.as_slice(),
*expected,
"mismatch for input: {input:?}\n got: {got:?}\n expected: {expected:?}"
);
}
}
}