html_query_ast/
parser.rs

1// This is the first parser I've written using nom. It's based heavily off this one from Nom:
2// https://github.com/Geal/nom/blob/3645656644e3ae5074b61cc57e3f62877ada9190/tests/json.rs
3
4use nom::bytes::complete::{take_while, take_while1};
5use nom::combinator::fail;
6use nom::error::{convert_error, VerboseError};
7use nom::multi::many_till;
8use nom::sequence::{pair, terminated};
9use nom::{
10    branch::alt,
11    bytes::complete::tag,
12    character::complete::{alphanumeric1, char, multispace0},
13    combinator::map,
14    error::ParseError,
15    multi::separated_list0,
16    sequence::{delimited, preceded, separated_pair},
17    IResult, Parser,
18};
19use scraper::Selector;
20use std::collections::HashMap;
21
22#[derive(Debug, PartialEq)]
23pub enum Expression {
24    // .foo | @text
25    Text,
26    // .foo | @(href)
27    Attribute(String),
28    // @parent
29    Parent,
30    // @sibling(1)
31    Sibling(usize),
32    // .abc > def
33    Selector(Selector, String),
34}
35
36#[derive(Debug, PartialEq)]
37pub enum Action {
38    // selector | [{foo: name }]
39    ForEachChild(HashMap<String, Box<Action>>),
40    // selector | [.name]
41    ForEachChildArray(Box<Action>),
42    // selector | {foo: name }
43    Child(HashMap<String, Box<Action>>),
44    // .foo > bar | ...
45    Expression(Expression, Option<Box<Action>>),
46}
47
48fn ws<'a, O, E: ParseError<&'a str>, F: Parser<&'a str, O, E>>(f: F) -> impl Parser<&'a str, O, E> {
49    delimited(multispace0, f, multispace0)
50}
51
52fn object_key(i: &str) -> IResult<&str, &str, VerboseError<&str>> {
53    terminated(alphanumeric1, char(':'))(i)
54}
55
56fn object_key_suffix(i: &str) -> IResult<&str, &str, VerboseError<&str>> {
57    preceded(ws(tag(",")), terminated(alphanumeric1, char(':')))(i)
58}
59
60fn expression_rhs(i: &str) -> IResult<&str, Expression, VerboseError<&str>> {
61    // This code is smelly. The issue here is parsing this expression:
62    // {a: .foo, b: .foo | .bar}
63    //     ^ here
64    // We need to work out what bit to parse next. `, b: .foo` _could_ be part of
65    // our selector? i.e `(.foo, b: .foo) | .bar`
66    // But obviously that's not what we want. So here, we take until we find a `,`, `}` while
67    // `object_key_suffix` does not match.
68    let (_, (matches, _)): (_, (Vec<&str>, _)) = many_till(
69        ws(take_while(|c: char| c != ',' && c != '}')),
70        alt((ws(object_key_suffix), ws(tag("}")))),
71    )(i)?;
72    let rhs = match matches[..] {
73        [first] if first.contains('|') => {
74            let (rhs, _) = first.split_once('|').unwrap();
75            rhs
76        }
77        _ => {
78            fail::<_, &str, _>(i)?;
79            unreachable!()
80        }
81    };
82    let (_, expression) = expression(rhs)?;
83    let new_rest = &i[rhs.len()..];
84    Ok((new_rest, expression))
85}
86
87fn selector(i: &str) -> IResult<&str, (Selector, &str), VerboseError<&str>> {
88    let (rest, value) = take_while(|c| !matches!(c, ',' | '}' | ']'))(i)?;
89    match Selector::parse(value) {
90        Ok(v) => Ok((rest, (v, value))),
91        Err(_) => {
92            fail::<_, &str, _>(i)?;
93            unreachable!()
94        }
95    }
96}
97
98fn expression(i: &str) -> IResult<&str, Expression, VerboseError<&str>> {
99    alt((
100        map(ws(tag("@parent")), |_| Expression::Parent),
101        map(
102            delimited(ws(tag("@(")), take_while(|c: char| c != ')'), ws(tag(")"))),
103            |v: &str| Expression::Attribute(v.to_string()),
104        ),
105        map(preceded(ws(tag("@")), tag("text")), |_: &str| {
106            Expression::Text
107        }),
108        map(
109            delimited(
110                tag("@sibling("),
111                ws(take_while1(|c: char| c.is_ascii_digit())),
112                tag(")"),
113            ),
114            |v: &str| Expression::Sibling(v.parse::<usize>().unwrap()),
115        ),
116        map(selector, |(sel, val)| Expression::Selector(sel, val.into())),
117    ))(i)
118}
119
120fn object_value(i: &str) -> IResult<&str, Action, VerboseError<&str>> {
121    alt((
122        map(object, |v| {
123            Action::Child(v.into_iter().map(|(k, v)| (k.into(), v.into())).collect())
124        }),
125        map(delimited(ws(char('[')), object, ws(char(']'))), |v| {
126            Action::ForEachChild(v.into_iter().map(|(k, v)| (k.into(), v.into())).collect())
127        }),
128        map(delimited(ws(char('[')), object_value, ws(char(']'))), |v| {
129            Action::ForEachChildArray(v.into())
130        }),
131        map(
132            separated_pair(expression_rhs, ws(char('|')), object_value),
133            |v: (Expression, Action)| Action::Expression(v.0, Some(v.1.into())),
134        ),
135        map(expression, |v: Expression| Action::Expression(v, None)),
136    ))(i)
137}
138
139pub fn object(input: &str) -> IResult<&str, HashMap<&str, Action>, VerboseError<&str>> {
140    map(
141        delimited(
142            char('{'),
143            ws(separated_list0(
144                ws(char(',')),
145                pair(object_key, ws(object_value)),
146            )),
147            char('}'),
148        ),
149        |key_values| key_values.into_iter().collect(),
150    )(input)
151}
152
153pub fn format_error(input: &str, error: VerboseError<&str>) -> String {
154    convert_error(input, error)
155}
156
157#[test]
158fn test_attribute() {
159    let expected: HashMap<&str, Action> = vec![(
160        "foo",
161        Action::Expression(Expression::Attribute("abc".into()), None),
162    )]
163    .into_iter()
164    .collect();
165
166    assert_eq!(object("{foo: @(abc)}"), Ok(("", expected)));
167}
168
169#[test]
170fn test_nested_attribute() {
171    let expected: HashMap<&str, Action> = vec![(
172        "foo",
173        Action::Expression(
174            Expression::Selector(Selector::parse(".abc").unwrap(), ".abc ".into()),
175            Some(Action::Expression(Expression::Attribute("abc".into()), None).into()),
176        ),
177    )]
178    .into_iter()
179    .collect();
180
181    assert_eq!(object("{foo: .abc | @(abc)}"), Ok(("", expected)));
182}
183
184#[test]
185fn test_nested() {
186    let expected: HashMap<&str, Action> = vec![(
187        "foo",
188        Action::Expression(
189            Expression::Selector(Selector::parse(".bar").unwrap(), ".bar ".into()),
190            Some(
191                Action::ForEachChild(
192                    [(
193                        "baz".to_string(),
194                        Box::new(Action::Expression(
195                            Expression::Attribute("abc".into()),
196                            None,
197                        )),
198                    )]
199                    .into(),
200                )
201                .into(),
202            ),
203        ),
204    )]
205    .into_iter()
206    .collect();
207
208    assert_eq!(object("{foo: .bar | [{baz: @(abc)}]}"), Ok(("", expected)));
209}
210
211#[test]
212fn test_array_attribute() {
213    let expected: HashMap<&str, Action> = vec![(
214        "foo",
215        Action::Expression(
216            Expression::Selector(Selector::parse(".bar").unwrap(), ".bar ".into()),
217            Some(
218                Action::ForEachChildArray(Box::new(Action::Expression(
219                    Expression::Attribute("abc".into()),
220                    None,
221                )))
222                .into(),
223            ),
224        ),
225    )]
226    .into_iter()
227    .collect();
228
229    assert_eq!(object("{foo: .bar | [@(abc)]}"), Ok(("", expected)));
230}
231
232#[test]
233fn test_array_nested() {
234    let expected: HashMap<&str, Action> = vec![(
235        "foo",
236        Action::Expression(
237            Expression::Selector(Selector::parse(".bar").unwrap(), ".bar ".into()),
238            Some(
239                Action::ForEachChildArray(
240                    Action::Expression(
241                        Expression::Selector(Selector::parse(".lol ").unwrap(), ".lol ".into()),
242                        Some(
243                            Action::Expression(
244                                Expression::Selector(Selector::parse("bar").unwrap(), "bar".into()),
245                                None,
246                            )
247                            .into(),
248                        ),
249                    )
250                    .into(),
251                )
252                .into(),
253            ),
254        ),
255    )]
256    .into_iter()
257    .collect();
258
259    assert_eq!(object("{foo: .bar | [.lol | bar]}"), Ok(("", expected)));
260}