#![no_std]
extern crate alloc;
extern crate core;
use alloc::borrow::Cow;
use core::ops::RangeFrom;
use nom::bytes::streaming::{tag, take_till1, take_while_m_n};
use nom::character::streaming::{char, multispace1};
use nom::combinator::{flat_map, map, opt, value};
use nom::error::{ErrorKind as nomErrKind, ParseError};
use nom::sequence::{delimited, preceded};
use nom::{
branch::alt, multi::fold_many0, AsBytes, AsChar, Compare, IResult, InputIter, InputLength,
InputTake, Slice,
};
mod raw;
pub use raw::parse_raw_string;
pub trait MyInput:
AsBytes
+ Clone
+ InputIter
+ InputTake
+ InputLength
+ Slice<RangeFrom<usize>>
+ nom::InputTakeAtPosition
+ for<'a> Compare<&'a str>
{
}
impl<T> MyInput for T where
Self: AsBytes
+ Clone
+ InputIter
+ InputTake
+ InputLength
+ Slice<RangeFrom<usize>>
+ nom::InputTakeAtPosition
+ for<'a> Compare<&'a str>
{
}
#[inline]
fn nome_from_error_kind<I, E>(input: I, kind: nomErrKind) -> nom::Err<E>
where
E: ParseError<I>,
{
nom::Err::Error(E::from_error_kind(input, kind))
}
fn parse_unicode<I, E>(input: I) -> IResult<I, char, E>
where
E: ParseError<I>,
I: MyInput,
<I as nom::InputIter>::Item: AsChar,
{
let i = input.clone();
let (input, hex) = delimited(
tag("u{"),
take_while_m_n(1, 6, AsChar::is_hex_digit),
char('}'),
)(input)?;
let o2 = u32::from_str_radix(core::str::from_utf8(hex.as_bytes()).unwrap(), 16)
.map_err(|_| nome_from_error_kind(i.clone(), nomErrKind::MapRes))?;
let o3 = core::char::from_u32(o2).ok_or_else(|| nome_from_error_kind(i, nomErrKind::MapOpt))?;
Ok((input, o3))
}
fn parse_escaped_char<I, E>(input: I) -> IResult<I, char, E>
where
E: ParseError<I>,
I: InputIter + Slice<RangeFrom<usize>>,
<I as nom::InputIter>::Item: AsChar + Copy,
{
let x = input
.iter_elements()
.next()
.ok_or(nom::Err::Incomplete(nom::Needed::Size(1)))?;
let xr = match x.as_char() {
'n' => '\n',
'r' => '\r',
't' => '\t',
'b' => '\u{08}',
'f' => '\u{0C}',
xc @ '\\' | xc @ '/' => xc,
_ => return Err(nome_from_error_kind(input, nomErrKind::OneOf)),
};
Ok((input.slice(x.len()..), xr))
}
fn parse_literal<I, E>(delim: char) -> impl Fn(I) -> IResult<I, I, E>
where
E: ParseError<I>,
I: MyInput,
<I as nom::InputTakeAtPosition>::Item: AsChar,
{
take_till1(
move |i: <I as nom::InputTakeAtPosition>::Item| match i.as_char() {
'\\' => true,
j => j == delim,
},
)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum StringFragment<I> {
Literal(I),
EscapedChar(char),
EscapedWS,
}
fn parse_fragment<I, E>(delim: char) -> impl Fn(I) -> IResult<I, StringFragment<I>, E>
where
E: ParseError<I>,
I: MyInput,
<I as nom::InputIter>::Item: AsChar + Copy,
<I as nom::InputTakeAtPosition>::Item: AsChar + Copy,
{
alt((
map(parse_literal(delim), StringFragment::Literal),
preceded(
char('\\'),
alt((
map(
alt((parse_unicode, parse_escaped_char, char(delim))),
StringFragment::EscapedChar,
),
value(StringFragment::EscapedWS, multispace1),
)),
),
))
}
fn fragment_fold<'i, I: AsBytes + ?Sized + 'i>(
mut string: Cow<'i, [u8]>,
fragment: StringFragment<&'i I>,
) -> Cow<'i, [u8]> {
match fragment {
StringFragment::Literal(s) => string.to_mut().extend_from_slice(s.as_bytes()),
StringFragment::EscapedChar(c) => {
let s = string.to_mut();
let oldlen = s.len();
s.resize(oldlen + 4, 0u8);
let dstlen = c.encode_utf8(&mut s[oldlen..]).len();
s.truncate(oldlen + dstlen);
}
StringFragment::EscapedWS => {}
}
string
}
pub fn parse_string<'i, I, E>(delim: char) -> impl Fn(&'i I) -> IResult<&'i I, Cow<'i, [u8]>, E>
where
E: ParseError<&'i I>,
I: AsBytes + ?Sized + 'i,
&'i I: MyInput + PartialEq,
<&'i I as nom::InputIter>::Item: AsChar + Copy,
<&'i I as nom::InputTakeAtPosition>::Item: AsChar + Copy,
{
debug_assert!(delim != '\\');
delimited(
char(delim),
flat_map(
opt(map(parse_literal(delim), I::as_bytes)),
move |init: Option<&'i [u8]>| {
fold_many0(
parse_fragment(delim),
Cow::Borrowed(init.unwrap_or(&[])),
fragment_fold,
)
},
),
char(delim),
)
}
#[cfg(test)]
mod tests {
use super::*;
fn cwtr<'a>(x: &'a (&'a [u8], Cow<'a, [u8]>)) -> (&'a [u8], &'a [u8]) {
(x.0, &*x.1)
}
#[test]
fn test0() {
let sprs = parse_string::<_, ()>('"');
let res = sprs(b"\"abc\"".as_ref());
assert_eq!(
res.as_ref().map(cwtr),
Ok(("".as_bytes(), "abc".as_bytes()))
);
if let Cow::Owned(_) = res.unwrap().1 {
unreachable!();
}
let data: &[u8] = b"\"tab:\\tafter tab, newline:\\nnew line, quote: \\\", emoji: \\u{1F602}, newline:\\nescaped whitespace: \\ abc\"";
let tmp = sprs(data);
assert_eq!(
tmp.as_ref().map(cwtr),
Ok((
"".as_bytes(),
"tab:\tafter tab, newline:\nnew line, quote: \", emoji: 😂, newline:\nescaped whitespace: abc".as_bytes()
))
);
}
}