yatima_core/parse/
string.rs

1////! Adapted from the examples in the nom repository
2////! https://github.com/Geal/nom/blob/master/examples/string.rs
3////! which licensed under the following MIT license:
4////! https://github.com/Geal/nom/blob/master/LICENSE
5
6use crate::parse::{
7  error::{
8    ParseError,
9    ParseErrorKind,
10  },
11  span::Span,
12};
13
14use nom::{
15  branch::alt,
16  bytes::complete::{
17    take_till1,
18    take_while_m_n,
19  },
20  character::complete::{
21    char,
22    multispace1,
23  },
24  combinator::{
25    map,
26    value,
27    verify,
28  },
29  error::context,
30  multi::fold_many0,
31  sequence::preceded,
32  Err,
33  IResult,
34};
35
36use sp_std::borrow::ToOwned;
37
38use alloc::string::String;
39
40pub fn parse_codepoint(from: Span) -> IResult<Span, char, ParseError<Span>> {
41  let (i, _) =
42    context("the character 'u' to begin a unicode codepoint", char('u'))(from)?;
43  let (i, _) =
44    context("the open brace '{' of a unicode codepoint", char('{'))(i)?;
45  let (i, s) = context(
46    "between 1 to 6 hexadecimal digits to indicate a unicode codepoint",
47    take_while_m_n(1, 6, |c: char| c.is_ascii_hexdigit()),
48  )(i)?;
49  let (i, _) =
50    context("the close brace '}' of a unicode codepoint", char('}'))(i)?;
51  let s = s.fragment();
52  match u32::from_str_radix(s, 16) {
53    Ok(x) => match char::from_u32(x) {
54      Some(c) => Ok((i, c)),
55      _ => Err(Err::Error(ParseError::new(
56        i,
57        ParseErrorKind::InvalidBase16EscapeSequence(String::from(s.to_owned())),
58      ))),
59    },
60    Err(e) => {
61      Err(Err::Error(ParseError::new(i, ParseErrorKind::ParseIntErr(e))))
62    }
63  }
64}
65pub fn parse_escape(i: Span) -> IResult<Span, char, ParseError<Span>> {
66  let (i, _) = context(
67    "the backslash '\\' to begin a string escape sequence",
68    char('\\'),
69  )(i)?;
70  alt((
71    parse_codepoint,
72    value('\n', char('n')),
73    value('\r', char('r')),
74    value('\t', char('t')),
75    value('\u{08}', char('b')),
76    value('\u{0C}', char('f')),
77    value('\\', char('\\')),
78    value('"', char('"')),
79    value('\'', char('\'')),
80  ))(i)
81}
82
83fn parse_escaped_whitespace(
84  from: Span,
85) -> IResult<Span, Span, ParseError<Span>> {
86  preceded(
87    context(
88      "the backslash '\\' to begin string escaped whitespace",
89      char('\\'),
90    ),
91    multispace1,
92  )(from)
93}
94
95fn parse_literal(
96  halt: &'static str,
97) -> impl Fn(Span) -> IResult<Span, Span, ParseError<Span>> {
98  move |from: Span| {
99    let p = |c| halt.chars().any(|x| (x == c) | (c == '\\'));
100    verify(take_till1(p), |s: &Span| !s.fragment().is_empty())(from)
101  }
102}
103#[derive(Debug, Clone, Copy, PartialEq, Eq)]
104enum StringFragment<'a> {
105  Literal(Span<'a>),
106  EscapedChar(char),
107  EscapedWS,
108}
109
110fn parse_fragment<'a>(
111  halt: &'static str,
112) -> impl Fn(Span<'a>) -> IResult<Span<'a>, StringFragment<'a>, ParseError<Span<'a>>>
113{
114  move |from: Span<'a>| {
115    alt((
116      map(
117        context("string literal", parse_literal(halt)),
118        StringFragment::Literal,
119      ),
120      map(context("string escape", parse_escape), StringFragment::EscapedChar),
121      value(
122        StringFragment::EscapedWS,
123        context("escaped whitespace within a string", parse_escaped_whitespace),
124      ),
125    ))(from)
126  }
127}
128
129pub fn parse_string<'a>(
130  halt: &'static str,
131) -> impl Fn(Span<'a>) -> IResult<Span<'a>, String, ParseError<Span<'a>>> {
132  move |from: Span<'a>| {
133    fold_many0(parse_fragment(halt), String::new(), |mut string, fragment| {
134      match fragment {
135        StringFragment::Literal(s) => string.push_str(s.fragment()),
136        StringFragment::EscapedChar(c) => string.push(c),
137        StringFragment::EscapedWS => {}
138      }
139      string
140    })(from)
141  }
142}