pub use crate::errors::ParseError;
use nom::bytes::complete::{tag, take_till, take_till1};
use nom::error::ErrorKind;
use nom::multi::many1;
use nom::sequence::{delimited, terminated};
use nom::{IResult, Parser};
use std::borrow::Cow;
use std::fmt::Debug;
use std::iter::Iterator;
use std::str::from_utf8;
#[derive(PartialEq, Eq)]
pub struct AttrBytes<'a> {
pub name: &'a [u8],
pub value: Cow<'a, [u8]>,
pub param: Option<ParamBytes<'a>>,
}
#[derive(PartialEq, Eq)]
pub struct ParamBytes<'a> {
pub param: &'a [u8],
pub attr_name: &'a [u8],
}
#[derive(PartialEq, Eq)]
pub struct SectionBytes<'a> {
pub title: &'a [u8],
pub attrs: Vec<AttrBytes<'a>>,
}
impl<'a> Debug for AttrBytes<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let name = match from_utf8(self.name) {
Ok(s) => s.to_owned(),
Err(_) => format!("{:?}", self.name),
};
let value = match from_utf8(&self.value) {
Ok(s) => s.to_owned(),
Err(_) => format!("{:?}", self.value),
};
f.debug_struct("AttrBytes")
.field("name", &name)
.field("value", &value)
.field("param", &self.param)
.finish()
}
}
impl<'a> Debug for SectionBytes<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let title = match from_utf8(self.title) {
Ok(s) => s.to_owned(),
Err(_) => format!("{:?}", self.title),
};
f.debug_struct("SectionBytes")
.field("title", &title)
.field("attrs", &self.attrs)
.finish()
}
}
impl<'a> Debug for ParamBytes<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let attr_name = match from_utf8(self.attr_name) {
Ok(s) => s.to_owned(),
Err(_) => format!("{:?}", self.attr_name),
};
let param = match from_utf8(self.param) {
Ok(s) => s.to_owned(),
Err(_) => format!("{:?}", self.param),
};
f.debug_struct("AttrBytes")
.field("attr_name", &attr_name)
.field("param", ¶m)
.finish()
}
}
#[derive(Debug, PartialEq, Eq)]
pub struct AttrStr<'a> {
pub name: &'a str,
pub value: Cow<'a, str>,
pub param: Option<ParamStr<'a>>,
}
#[derive(Debug, PartialEq, Eq)]
pub struct ParamStr<'a> {
pub param: &'a str,
pub attr_name: &'a str,
}
#[derive(Debug, PartialEq, Eq)]
pub struct SectionStr<'a> {
pub title: &'a str,
pub attrs: Vec<AttrStr<'a>>,
}
#[inline]
fn parse_str(input: &[u8]) -> Result<&str, ParseError> {
std::str::from_utf8(input).map_err(|e| ParseError::Utf8Error {
bytes: input.to_owned(),
source: e,
})
}
#[inline]
fn parse_string(input: Vec<u8>) -> Result<String, ParseError> {
String::from_utf8(input).map_err(|e| {
let source = e.utf8_error();
ParseError::Utf8Error {
bytes: e.into_bytes(),
source,
}
})
}
impl<'a> TryFrom<AttrBytes<'a>> for AttrStr<'a> {
type Error = ParseError;
fn try_from(value: AttrBytes<'a>) -> Result<Self, Self::Error> {
Ok(Self {
name: parse_str(value.name)?,
value: match value.value {
Cow::Borrowed(s) => Cow::Borrowed(parse_str(s)?),
Cow::Owned(s) => Cow::Owned(parse_string(s)?),
},
param: match value.param {
Some(param) => Some(param.try_into()?),
None => None,
},
})
}
}
impl<'a> TryFrom<ParamBytes<'a>> for ParamStr<'a> {
type Error = ParseError;
fn try_from(value: ParamBytes<'a>) -> Result<Self, Self::Error> {
Ok(Self {
param: parse_str(value.param)?,
attr_name: parse_str(value.attr_name)?,
})
}
}
impl<'a> TryFrom<SectionBytes<'a>> for SectionStr<'a> {
type Error = ParseError;
fn try_from(value: SectionBytes<'a>) -> Result<Self, Self::Error> {
Ok(Self {
title: parse_str(value.title)?,
attrs: value
.attrs
.into_iter()
.map(|attrs| attrs.try_into())
.collect::<Result<Vec<AttrStr<'a>>, Self::Error>>()?,
})
}
}
fn not_whitespace(c: u8) -> bool {
c != b'\n' && c != b'\t' && c != b'\r' && c != b' '
}
fn header(input: &[u8]) -> IResult<&[u8], &[u8]> {
delimited(tag("["), take_till1(|c| c == b']' || c == b'['), tag("]"))
.parse(input)
}
fn comment_line(input: &[u8]) -> bool {
input.first() == Some(&(b'#')) || input.first() == Some(&(b';'))
}
fn next_line(
input: &[u8],
) -> Result<&[u8], nom::Err<nom::error::Error<&[u8]>>> {
if input.is_empty() {
return Ok(b"");
}
let (rem, _) = take_till(not_whitespace)(input)?;
if comment_line(rem) {
let (rem, _) = take_till(|c| c == b'\n')(rem)?;
return next_line(rem);
}
Ok(rem)
}
fn find_start(input: &[u8]) -> IResult<&[u8], &[u8]> {
take_till(|c| c == b'[')(input)
}
fn trim_whitespace_front(input: &[u8]) -> &[u8] {
match input.iter().position(|c| not_whitespace(*c)) {
Some(first_non_whitespace) => &input[first_non_whitespace..],
None => &[],
}
}
fn trim_whitespace_back(input: &[u8]) -> &[u8] {
match input.iter().rposition(|c| not_whitespace(*c)) {
Some(last_non_whitespace) => &input[..last_non_whitespace + 1],
None => &[],
}
}
fn params(input: &[u8]) -> IResult<&[u8], ParamBytes<'_>> {
let (rem, attr_name) =
terminated(take_till(|c| c == b'['), tag("[")).parse(input)?;
let (rem, param) = take_till(|c| c == b']')(rem)?;
Ok((rem, ParamBytes { param, attr_name }))
}
#[derive(PartialEq, Eq)]
enum LineCont {
End,
Cont,
}
fn value_line(input: &[u8]) -> IResult<&[u8], (LineCont, &[u8])> {
let (rem, line) = take_till(|c| c == b'\n' || c == b'\\')(input)?;
match rem.first() {
Some(b'\\') => Ok((next_line(&rem[1..])?, (LineCont::Cont, line))),
_ => Ok((next_line(rem)?, (LineCont::End, line))),
}
}
fn value(input: &[u8]) -> IResult<&[u8], Cow<'_, [u8]>> {
let (rem, (cont, line)) = value_line(input)?;
let line = trim_whitespace_front(line);
match cont {
LineCont::End => Ok((rem, line.into())),
LineCont::Cont => {
let mut line = Vec::from(line);
line.push(b' '); let mut rem = rem;
loop {
let (rem_next, (cont, line_part)) = value_line(rem)?;
line.extend(line_part);
rem = rem_next;
match cont {
LineCont::End => break,
LineCont::Cont => line.push(b' '), }
}
Ok((rem, line.into()))
}
}
}
fn attr(input: &[u8]) -> IResult<&[u8], AttrBytes<'_>> {
if input.first() == Some(&(b'[')) {
return Err(nom::Err::Error(nom::error::Error::new(
input,
ErrorKind::Complete,
)));
}
let (rem, name) =
terminated(take_till(|c| c == b'='), tag("=")).parse(input)?;
let name = trim_whitespace_back(name);
let (rem, value) = value(rem)?;
Ok((
next_line(rem)?,
AttrBytes {
name,
value,
param: params(name).ok().map(|(_, param)| param),
},
))
}
fn section(input: &[u8]) -> IResult<&[u8], SectionBytes<'_>> {
let (rem, title) = header(input)?;
let rem = next_line(rem)?;
let (rem, attrs) = many1(attr).parse(rem)?;
Ok((rem, SectionBytes { title, attrs }))
}
pub struct SectionBytesIter<'a> {
rem: &'a [u8],
found_start: bool,
error: bool,
}
impl<'a> SectionBytesIter<'a> {
fn next_section(&mut self) -> Result<SectionBytes<'a>, ParseError> {
self.rem = find_start(self.rem)?.0;
self.found_start = true;
let (rem, section_bytes) = section(self.rem)?;
self.rem = rem;
Ok(section_bytes)
}
}
impl<'a> Iterator for SectionBytesIter<'a> {
type Item = Result<SectionBytes<'a>, ParseError>;
fn next(&mut self) -> Option<Self::Item> {
if self.rem.is_empty() || self.error {
return None;
}
let next = self.next_section();
self.error = next.is_err();
Some(next)
}
}
pub fn parse_entry(input: &[u8]) -> SectionBytesIter<'_> {
SectionBytesIter {
rem: input,
found_start: false,
error: false,
}
}
pub struct SectionStrIter<'a> {
internal: SectionBytesIter<'a>,
}
impl<'a> Iterator for SectionStrIter<'a> {
type Item = Result<SectionStr<'a>, ParseError>;
fn next(&mut self) -> Option<Self::Item> {
match self.internal.next() {
Some(Ok(v)) => Some(SectionStr::try_from(v)),
Some(Err(e)) => Some(Err(e)),
None => None,
}
}
}
pub fn parse_entry_str(input: &[u8]) -> SectionStrIter<'_> {
SectionStrIter {
internal: SectionBytesIter {
rem: input,
found_start: false,
error: false,
},
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn trim_front() {
assert_eq!(trim_whitespace_front(b" \ttest"), &b"test"[..])
}
#[test]
fn trim_back() {
assert_eq!(trim_whitespace_back(b"test \t"), &b"test"[..])
}
#[test]
fn trim_front_none() {
assert_eq!(trim_whitespace_front(b"test"), &b"test"[..])
}
#[test]
fn trim_back_none() {
assert_eq!(trim_whitespace_back(b"test"), &b"test"[..])
}
mod fn_header {
use super::*;
#[test]
fn ok() {
assert_eq!(header(b"[hello]"), Ok((&b""[..], &b"hello"[..])));
}
#[test]
fn no_start() {
assert_eq!(
header(b"hello").unwrap_err(),
nom::Err::Error(nom::error::make_error(
&b"hello"[..],
ErrorKind::Tag
))
);
}
#[test]
fn no_end() {
assert_eq!(
header(b"[hello").unwrap_err(),
nom::Err::Error(nom::error::make_error(
&b""[..],
ErrorKind::Tag
))
);
}
#[test]
fn double_start_bracket() {
assert_eq!(
header(b"[h[ello]").unwrap_err(),
nom::Err::Error(nom::error::make_error(
&b"[ello]"[..],
ErrorKind::Tag
))
);
}
}
mod fn_next_line {
use super::*;
#[test]
fn empty() {
assert_eq!(next_line(b""), Ok(&b""[..]));
}
#[test]
fn only_whitespace() {
assert_eq!(next_line(b" \t \t\n\r\nhello"), Ok(&b"hello"[..]));
}
#[test]
fn comment() {
assert_eq!(
next_line(b" \t\n# Comment\nhello"),
Ok(&b"hello"[..])
);
}
#[test]
fn no_change() {
assert_eq!(next_line(b"hello\n"), Ok(&b"hello\n"[..]));
}
}
mod fn_attr {
use super::*;
#[test]
fn ok() {
assert_eq!(
attr(b"hello=world"),
Ok((
&b""[..],
AttrBytes {
name: &b"hello"[..],
value: b"world"[..].into(),
param: None,
}
))
);
}
#[test]
fn with_param() {
assert_eq!(
attr(b"hello[en]=world"),
Ok((
&b""[..],
AttrBytes {
name: &b"hello[en]"[..],
value: b"world"[..].into(),
param: Some(ParamBytes {
attr_name: &b"hello"[..],
param: &b"en"[..]
}),
}
))
);
}
#[test]
fn space_in_value() {
assert_eq!(
attr(b"hello=world today"),
Ok((
&b""[..],
AttrBytes {
name: &b"hello"[..],
value: b"world today"[..].into(),
param: None,
}
))
);
}
#[test]
fn no_value() {
assert_eq!(
attr(b"hello="),
Ok((
&b""[..],
AttrBytes {
name: &b"hello"[..],
value: b""[..].into(),
param: None,
}
))
);
}
#[test]
fn no_name() {
assert_eq!(
attr(b"=world"),
Ok((
&b""[..],
AttrBytes {
name: &b""[..],
value: b"world"[..].into(),
param: None,
}
))
);
}
#[test]
fn no_eq() {
assert_eq!(
attr(b"hello"),
Err(nom::Err::Error(nom::error::Error {
input: &b""[..],
code: ErrorKind::Tag
}))
);
}
#[test]
fn whitespace() {
assert_eq!(
attr(b"hello = world today"),
Ok((
&b""[..],
AttrBytes {
name: &b"hello"[..],
value: b"world today"[..].into(),
param: None,
}
))
);
}
}
mod fn_section {
use super::*;
#[test]
fn ok() {
assert_eq!(
section(b"[apps]\nSize=48\nScale=1"),
Ok((
&b""[..],
SectionBytes {
title: &b"apps"[..],
attrs: vec![
AttrBytes {
name: &b"Size"[..],
value: b"48"[..].into(),
param: None,
},
AttrBytes {
name: &b"Scale"[..],
value: b"1"[..].into(),
param: None,
}
]
}
))
);
}
#[test]
fn no_attrs() {
assert_eq!(
section(b"[apps]\n"),
Err(nom::Err::Error(nom::error::Error {
input: &b""[..],
code: ErrorKind::Tag
}))
);
}
#[test]
fn no_header() {
assert_eq!(
section(b"Size=48\nScale=1"),
Err(nom::Err::Error(nom::error::Error {
input: &b"Size=48\nScale=1"[..],
code: ErrorKind::Tag
}))
);
}
}
mod fn_value {
use super::*;
#[test]
fn single_line() {
assert_eq!(value(b"value\n"), Ok((&b""[..], b"value"[..].into())))
}
#[test]
fn two_line() {
assert_eq!(
value(b"value\\\nvalue2\n"),
Ok((&b""[..], b"value value2"[..].into()))
)
}
#[test]
fn three_line() {
assert_eq!(
value(b"value\\\nvalue2\\\nvalue3\n"),
Ok((&b""[..], b"value value2 value3"[..].into()))
)
}
#[test]
fn three_line_one_comment() {
assert_eq!(
value(b"value\\\nvalue2\\\n# commnet\nvalue3\n"),
Ok((&b""[..], b"value value2 value3"[..].into()))
)
}
#[test]
fn three_line_two_comment() {
assert_eq!(
value(b"value\\\nvalue2\\\n# commnet\n; comment 2\nvalue3\n"),
Ok((&b""[..], b"value value2 value3"[..].into()))
)
}
}
#[test]
fn parse_icon_index() {
let input = include_bytes!("./../test_data/gnome-index.theme");
let sections = parse_entry(input)
.collect::<Result<Vec<_>, _>>()
.expect("Error parsing input");
assert_eq!(sections.len(), 68);
assert_eq!(sections[50].title, &b"48x48/status"[..]);
assert_eq!(sections[50].attrs[1].name, &b"Size"[..]);
assert_eq!(sections[50].attrs[1].value, &b"48"[..]);
}
#[test]
fn parse_firefox_desktop_entry() {
let input = include_bytes!("./../test_data/firefox.desktop");
let sections = parse_entry(input)
.collect::<Result<Vec<_>, _>>()
.expect("Error parsing input");
assert_eq!(sections.len(), 3);
assert_eq!(
sections[0].attrs[1],
AttrBytes {
name: &b"Name"[..],
value: b"Firefox"[..].into(),
param: None,
}
);
assert_eq!(
sections[0].attrs[4],
AttrBytes {
name: &b"GenericName[ast]"[..],
value: b"Restolador Web"[..].into(),
param: Some(ParamBytes {
attr_name: &b"GenericName"[..],
param: &b"ast"[..]
}),
}
);
}
#[test]
fn parse_sshd_systemd_unit() {
let input = include_bytes!("./../test_data/sshd.service");
let sections = parse_entry(input)
.collect::<Result<Vec<_>, _>>()
.expect("Error parsing input");
assert_eq!(sections.len(), 3);
}
#[test]
fn parse_systemd_test() {
let input = include_bytes!("./../test_data/edge-cases.txt");
let sections = parse_entry(input)
.collect::<Result<Vec<_>, _>>()
.expect("Error parsing input");
assert_eq!(sections.len(), 3);
assert_eq!(sections[0].title, &b"Section A"[..]);
assert_eq!(sections[0].attrs[0].name, &b"KeyOne"[..]);
assert_eq!(sections[0].attrs[0].value, &b"value 1"[..]);
assert_eq!(sections[0].attrs[1].name, &b"KeyTwo"[..]);
assert_eq!(sections[0].attrs[1].value, &b"value 2"[..]);
assert_eq!(sections[1].title, &b"Section B"[..]);
assert_eq!(sections[1].attrs[0].name, &b"Setting"[..]);
assert_eq!(sections[1].attrs[0].value, &b"\"something\" \"some thing\" \"\xE2\x80\xA6\""[..]);
assert_eq!(sections[1].attrs[1].name, &b"KeyTwo"[..]);
assert_eq!(sections[1].attrs[1].value, &b"value 2 value 2 continued"[..]);
assert_eq!(sections[2].title, &b"Section C"[..]);
assert_eq!(sections[2].attrs[0].name, &b"KeyThree"[..]);
assert_eq!(sections[2].attrs[0].value, &b"value 3 value 3 continued"[..]);
}
}