use crate::error::ParseError;
use crate::span::Span;
use memchr::memchr_iter;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum SplitMode {
KeyValuePairs,
SpaceTokens,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct LogDialect {
pub kv_delim: u8,
pub split_mode: SplitMode,
}
impl Default for LogDialect {
fn default() -> Self {
Self {
kv_delim: b'=',
split_mode: SplitMode::KeyValuePairs,
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct LineView<'src> {
pub text: &'src str,
pub byte_offset: usize,
}
impl<'src> LineView<'src> {
#[inline]
pub fn span(self) -> Span {
Span {
start: self.byte_offset,
end: self.byte_offset + self.text.len(),
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct FieldRef<'src> {
pub key: &'src str,
pub value: &'src str,
}
pub struct LineIter<'src> {
buf: &'src [u8],
max_line_bytes: Option<usize>,
next_start: usize,
}
impl<'src> LineIter<'src> {
pub fn new(buf: &'src [u8], max_line_bytes: Option<usize>) -> Self {
Self {
buf,
max_line_bytes,
next_start: 0,
}
}
}
impl<'src> Iterator for LineIter<'src> {
type Item = Result<LineView<'src>, ParseError>;
fn next(&mut self) -> Option<Self::Item> {
if self.next_start >= self.buf.len() {
return None;
}
let start = self.next_start;
let search = &self.buf[start..];
let rel_nl = memchr_iter(b'\n', search).next();
let (line_end_exclusive, after_nl) = if let Some(rel) = rel_nl {
let abs_nl = start + rel;
let mut end = abs_nl;
if end > start && self.buf[end - 1] == b'\r' {
end -= 1;
}
(end, abs_nl + 1)
} else {
(self.buf.len(), self.buf.len())
};
let len = line_end_exclusive.saturating_sub(start);
if let Some(max) = self.max_line_bytes {
if len > max {
self.next_start = after_nl;
return Some(Err(ParseError::LineTooLong {
line_start: start,
len,
max,
}));
}
}
let line_bytes = &self.buf[start..line_end_exclusive];
let text = match std::str::from_utf8(line_bytes) {
Ok(s) => s,
Err(e) => {
self.next_start = after_nl;
return Some(Err(ParseError::InvalidUtf8 {
byte: start + e.valid_up_to(),
}));
}
};
self.next_start = after_nl;
Some(Ok(LineView {
text,
byte_offset: start,
}))
}
}
pub fn fields_on_line<'src>(line: LineView<'src>, dialect: LogDialect) -> Vec<FieldRef<'src>> {
let mut out = Vec::new();
match dialect.split_mode {
SplitMode::KeyValuePairs => {
for token in split_ascii_whitespace(line.text) {
if let Some((k, v)) = split_first_byte(token, dialect.kv_delim) {
out.push(FieldRef { key: k, value: v });
} else {
out.push(FieldRef {
key: "",
value: token,
});
}
}
}
SplitMode::SpaceTokens => {
for token in split_ascii_whitespace(line.text) {
out.push(FieldRef {
key: "",
value: token,
});
}
}
}
out
}
pub fn push_fields_on_line<'src>(
line: LineView<'src>,
dialect: LogDialect,
out: &mut Vec<FieldRef<'src>>,
) {
match dialect.split_mode {
SplitMode::KeyValuePairs => {
for token in split_ascii_whitespace(line.text) {
if let Some((k, v)) = split_first_byte(token, dialect.kv_delim) {
out.push(FieldRef { key: k, value: v });
} else {
out.push(FieldRef {
key: "",
value: token,
});
}
}
}
SplitMode::SpaceTokens => {
for token in split_ascii_whitespace(line.text) {
out.push(FieldRef {
key: "",
value: token,
});
}
}
}
}
fn split_first_byte<'a>(s: &'a str, delim: u8) -> Option<(&'a str, &'a str)> {
let b = s.as_bytes();
let i = b.iter().position(|&c| c == delim)?;
let (k, rest) = s.split_at(i);
let v = rest.get(1..)?;
Some((k, v))
}
fn split_ascii_whitespace(mut s: &str) -> impl Iterator<Item = &str> {
std::iter::from_fn(move || {
s = s.trim_start_matches(|c: char| c.is_ascii_whitespace());
if s.is_empty() {
return None;
}
let end = s.find(|c: char| c.is_ascii_whitespace()).unwrap_or(s.len());
let (tok, rest) = s.split_at(end);
s = rest;
Some(tok)
})
}
pub fn lines_in_str<'src>(src: &'src str, max_line_bytes: Option<usize>) -> LineIter<'src> {
LineIter::new(src.as_bytes(), max_line_bytes)
}