thrift_parser/
basic.rs

1use nom::branch::alt;
2use nom::bytes::complete::{tag, take_till, take_until, take_while};
3use nom::character::complete::{char as cchar, multispace1, one_of, satisfy};
4use nom::combinator::{map, opt, recognize};
5use nom::multi::many1;
6use nom::sequence::{delimited, preceded, tuple};
7use nom::IResult;
8
9use crate::Parser;
10
11// Literal         ::=  ('"' [^"]* '"') | ("'" [^']* "'")
12#[derive(derive_newtype::NewType, Eq, PartialEq, Debug, Clone)]
13pub struct LiteralRef<'a>(&'a str);
14
15impl<'a> Parser<'a> for LiteralRef<'a> {
16    fn parse(input: &'a str) -> IResult<&'a str, Self> {
17        map(
18            alt((
19                delimited(cchar('"'), take_until("\""), cchar('"')),
20                delimited(cchar('\''), take_until("'"), cchar('\'')),
21            )),
22            Self,
23        )(input)
24    }
25}
26
27#[derive(derive_newtype::NewType, Eq, PartialEq, Debug, Clone)]
28pub struct Literal(String);
29
30impl<'a> From<LiteralRef<'a>> for Literal {
31    fn from(r: LiteralRef<'a>) -> Self {
32        Self(r.0.into())
33    }
34}
35
36impl<'a> Parser<'a> for Literal {
37    fn parse(input: &'a str) -> IResult<&'a str, Self> {
38        LiteralRef::parse(input).map(|(remains, parsed)| (remains, parsed.into()))
39    }
40}
41
42// Identifier      ::=  ( Letter | '_' ) ( Letter | Digit | '.' | '_' )*
43#[derive(derive_newtype::NewType, Eq, PartialEq, Debug, Clone)]
44pub struct IdentifierRef<'a>(&'a str);
45
46impl<'a> Parser<'a> for IdentifierRef<'a> {
47    fn parse(input: &'a str) -> IResult<&'a str, Self> {
48        map(
49            recognize(tuple((
50                opt(cchar('_')),
51                satisfy(|c| c.is_ascii_alphabetic()),
52                take_while(|c: char| c.is_ascii_alphanumeric() || c == '.' || c == '_'),
53            ))),
54            Self,
55        )(input)
56    }
57}
58
59#[derive(derive_newtype::NewType, Eq, PartialEq, Debug, Clone)]
60pub struct Identifier(String);
61
62impl<'a> From<IdentifierRef<'a>> for Identifier {
63    fn from(r: IdentifierRef<'a>) -> Self {
64        Self(r.0.into())
65    }
66}
67
68impl<'a> Parser<'a> for Identifier {
69    fn parse(input: &'a str) -> IResult<&'a str, Self> {
70        IdentifierRef::parse(input).map(|(remains, parsed)| (remains, parsed.into()))
71    }
72}
73
74// ListSeparator   ::=  ',' | ';'
75#[derive(Eq, PartialEq, Debug, Copy, Clone)]
76pub struct ListSeparator;
77
78impl<'a> Parser<'a> for ListSeparator {
79    fn parse(input: &'a str) -> IResult<&'a str, Self> {
80        map(one_of(",;"), |_: char| Self)(input)
81    }
82}
83
84// 1. The line begins with // or #
85// 2. The content between /* and */
86#[derive(Eq, PartialEq, Debug, Clone)]
87pub struct CommentRef<'a>(&'a str);
88
89impl<'a> Parser<'a> for CommentRef<'a> {
90    fn parse(input: &'a str) -> IResult<&'a str, Self> {
91        map(
92            alt((
93                preceded(tag("//"), take_till(|c| c == '\n')),
94                preceded(cchar('#'), take_till(|c| c == '\n')),
95                delimited(tag("/*"), take_until("*/"), tag("*/")),
96            )),
97            Self,
98        )(input)
99    }
100}
101
102#[derive(Eq, PartialEq, Debug, Clone)]
103pub struct Comment(String);
104
105impl<'a> From<CommentRef<'a>> for Comment {
106    fn from(r: CommentRef<'a>) -> Self {
107        Self(r.0.into())
108    }
109}
110
111impl<'a> Parser<'a> for Comment {
112    fn parse(input: &'a str) -> IResult<&'a str, Self> {
113        CommentRef::parse(input).map(|(remains, parsed)| (remains, parsed.into()))
114    }
115}
116
117// 1. Comment
118// 2. Space
119#[derive(Eq, PartialEq, Debug, Copy, Clone)]
120pub struct Separator;
121
122impl<'a> Parser<'a> for Separator {
123    fn parse(input: &'a str) -> IResult<&'a str, Self> {
124        map(
125            many1(alt((
126                map(CommentRef::parse, |_| ()),
127                map(multispace1, |_| ()),
128            ))),
129            |_| Self,
130        )(input)
131    }
132}
133
134#[cfg(test)]
135mod test {
136    use crate::utils::*;
137
138    use super::*;
139
140    #[test]
141    fn test_literal() {
142        assert_list_eq_with_f(
143            vec![
144                "'ihciah'balabala",
145                "'ihcia\"h'''''",
146                "\"ihciah\"balabala",
147                "\"ihcia'h\"''''",
148            ],
149            vec!["ihciah", "ihcia\"h", "ihciah", "ihcia'h"],
150            LiteralRef::parse,
151            LiteralRef,
152        );
153        assert_list_err_with_f(vec!["'ihcia\"aa"], LiteralRef::parse);
154    }
155
156    #[test]
157    fn test_identifier() {
158        assert_list_eq_with_f(
159            vec!["_ihc123iah,", "ihc123iah,"],
160            vec!["_ihc123iah", "ihc123iah"],
161            IdentifierRef::parse,
162            IdentifierRef,
163        );
164        assert_list_err_with_f(vec!["_123", "_", "123"], IdentifierRef::parse);
165    }
166
167    #[test]
168    fn test_list_separator() {
169        assert!(ListSeparator::parse(";").is_ok());
170        assert!(ListSeparator::parse(",").is_ok());
171        assert!(ListSeparator::parse("a").is_err());
172    }
173    #[test]
174    fn test_comment() {
175        assert_list_eq_with_f(
176            vec![
177                "//ihciah's #content",
178                "//ihciah's #content balabala\nNextLine",
179                "#ihciah's ///#content",
180                "/*ihciah's con@#tent*///aaa",
181            ],
182            vec![
183                "ihciah's #content",
184                "ihciah's #content balabala",
185                "ihciah's ///#content",
186                "ihciah's con@#tent",
187            ],
188            CommentRef::parse,
189            CommentRef,
190        );
191    }
192}