mongodb_language_model/
lib.rs

1//! `mongodb-language-model` is a library for parsing the MongoDB language and
2//! returning an abstract syntax tree using pest.rs.
3//!
4//! # Example
5//!
6//! ```rust
7//! use mongodb_language_model::*;
8//!
9//! let input = r#"{ "$or": [ { "status": "A" }, { "qty": { "$lt": 30 } }] }"#;
10//! let ast = parse(input).unwrap();
11//! ```
12
13extern crate pest;
14#[macro_use]
15extern crate pest_derive;
16
17use pest::{error::Error, iterators::Pair, Parser};
18
19#[derive(Parser)]
20#[grammar = "mongodb.pest"]
21pub struct MongoDbParser;
22
23#[derive(Debug, Clone, PartialEq)]
24pub struct Expression {
25    pub clauses: Vec<Clause>,
26}
27
28#[derive(Debug, Clone, PartialEq)]
29pub enum Clause {
30    Leaf(LeafClause),
31    ExpressionTree(ExpressionTreeClause),
32}
33
34#[derive(Debug, Clone, PartialEq)]
35pub struct LeafClause {
36    pub key: String,
37    pub value: Value,
38}
39
40#[derive(Debug, Clone, PartialEq)]
41pub struct ExpressionTreeClause {
42    pub operator: String,
43    pub expressions: Vec<Expression>,
44}
45
46#[derive(Debug, Clone, PartialEq)]
47pub enum Value {
48    Leaf(LeafValue),
49    Operators(Vec<Operator>),
50}
51
52#[derive(Debug, Clone, PartialEq)]
53pub struct LeafValue {
54    pub value: serde_json::Value,
55}
56
57// FIXME this can be different operator types:
58//       value_operator_type, list_operator_type, elemmatch_expression_operator_type,
59//       operator_expression_operator_type and more special cases not yet handled
60#[derive(Debug, Clone, PartialEq)]
61pub enum Operator {
62    // ElemMatch(ElemMatchOperator),
63    // ElemMatchOperatorObject(ElemMatchOperatorObjectOperator),
64    List(ListOperator),
65    Value(ValueOperator),
66    ExpressionOperator(OperatorExpressionOperator),
67}
68
69#[derive(Debug, Clone, PartialEq)]
70pub struct ListOperator {
71    pub operator: String,
72    pub values: Vec<LeafValue>,
73}
74
75#[derive(Debug, Clone, PartialEq)]
76pub struct ValueOperator {
77    pub operator: String,
78    pub value: LeafValue,
79}
80
81#[derive(Debug, Clone, PartialEq)]
82pub struct OperatorExpressionOperator {
83    pub operator: String,
84    pub operators: Vec<Operator>,
85}
86
87pub fn parse(query: &str) -> Result<Expression, Error<Rule>> {
88    let pair = MongoDbParser::parse(Rule::query, query)?.next().unwrap();
89
90    fn parse_query(query: Pair<Rule>) -> Expression {
91        let expression = query.into_inner().next().unwrap();
92        parse_expression(expression)
93    }
94
95    fn parse_expression(expression: Pair<Rule>) -> Expression {
96        let clause_list = expression.clone().into_inner().next().unwrap();
97        match clause_list.as_rule() {
98            Rule::clause_list => Expression {
99                clauses: parse_clause_list(clause_list),
100            },
101            t => unreachable!("parse_expression: {:?}\ngot: {:?}", t, expression),
102        }
103    }
104
105    fn parse_clause_list(clause: Pair<Rule>) -> Vec<Clause> {
106        clause.into_inner().map(|pair| parse_clause(pair)).collect()
107    }
108
109    fn parse_clause(outer_clause: Pair<Rule>) -> Clause {
110        let clause = outer_clause.clone().into_inner().next().unwrap();
111
112        match clause.as_rule() {
113            Rule::leaf_clause => {
114                let mut inner = clause.into_inner();
115                let key = inner.next().unwrap().as_str();
116                let value = parse_value(inner.next().unwrap());
117                Clause::Leaf(LeafClause {
118                    key: serde_json::from_str(key).unwrap(),
119                    value,
120                })
121            }
122            Rule::expression_tree_clause => {
123                let mut inner = clause.into_inner();
124                inner.next(); // quotation_mark
125                let operator = inner.next().unwrap().as_str();
126                inner.next(); // quotation_mark
127                let expression_list = inner.next().unwrap();
128                let expressions: Vec<Expression> = expression_list
129                    .into_inner()
130                    .map(|pair| parse_expression(pair))
131                    .collect();
132                Clause::ExpressionTree(ExpressionTreeClause {
133                    operator: operator.to_string(),
134                    expressions, // TODO parse_expression_tree(inner.next().unwrap()),
135                })
136            }
137            t => unreachable!("parse_clause: {:?}\nGot: {:?}", t, outer_clause),
138        }
139    }
140
141    fn parse_value(outer_value: Pair<Rule>) -> Value {
142        let value = outer_value.into_inner().next().unwrap();
143        parse_value_inner(value)
144    }
145
146    fn parse_value_inner(value: Pair<Rule>) -> Value {
147        match value.as_rule() {
148            Rule::leaf_value => Value::Leaf(parse_leaf_value(value)),
149            Rule::operator_expression => Value::Operators(parse_operator_expression(value)),
150            t => unreachable!("parse_value: {:?}\nGot: {:?}", t, value),
151        }
152    }
153
154    fn parse_leaf_value(value: Pair<Rule>) -> LeafValue {
155        let inner = value.clone().into_inner().next().unwrap();
156        match inner.as_rule() {
157            Rule::string => LeafValue {
158                value: serde_json::from_str(inner.as_str()).unwrap(),
159            },
160            Rule::number => LeafValue {
161                value: serde_json::from_str(inner.as_str()).unwrap(),
162            },
163            Rule::object => LeafValue {
164                value: parse_value_object(inner),
165            },
166            Rule::false_lit => LeafValue {
167                value: serde_json::json!(false),
168            },
169            Rule::true_lit => LeafValue {
170                value: serde_json::json!(true),
171            },
172            Rule::null => LeafValue {
173                value: serde_json::json!(null),
174            },
175            t => unreachable!("parse_leaf_value: {:?}\nGot: {:?}", t, inner),
176        }
177    }
178
179    fn parse_value_object(value: Pair<Rule>) -> serde_json::Value {
180        let json: serde_json::Value = serde_json::from_str(value.as_str()).unwrap();
181        let key = json.as_object().unwrap().keys().next().unwrap().as_str();
182        match key {
183            "$f" | "$numberDecimal" => json.get(key).unwrap().clone(),
184            _ => json,
185        }
186    }
187
188    fn parse_operator_expression(operator_expression: Pair<Rule>) -> Vec<Operator> {
189        let inner = operator_expression.clone().into_inner().next().unwrap();
190
191        match inner.as_rule() {
192            Rule::operator_list => parse_operator_list(inner),
193            t => unreachable!("parse_operator_expression: {:?}\nGot: {:?}", t, inner),
194        }
195    }
196
197    fn parse_operator_list(operator_list: Pair<Rule>) -> Vec<Operator> {
198        operator_list
199            .into_inner()
200            .map(|pair| parse_operator(pair))
201            .collect()
202    }
203
204    fn parse_operator(operator: Pair<Rule>) -> Operator {
205        let operator_type = operator.clone().into_inner().next().unwrap();
206        match operator_type.as_rule() {
207            Rule::list_operator_type => Operator::List(parse_list_operator_type(operator_type)),
208            Rule::value_operator_type => Operator::Value(parse_value_operator_type(operator_type)),
209            Rule::operator_expression_operator_type => {
210                Operator::ExpressionOperator(parse_operator_expression_operator_type(operator_type))
211            }
212            t => unreachable!("parse_operator: {:?}\nGot: {:?}", t, operator_type),
213        }
214    }
215
216    fn parse_operator_expression_operator_type(
217        operator_type: Pair<Rule>,
218    ) -> OperatorExpressionOperator {
219        let mut inner = operator_type.into_inner();
220        inner.next(); // quotation_mark
221        let operator = inner.next().unwrap().as_str();
222        inner.next(); // quotation_mark
223        let operator_expression = inner.next().unwrap();
224        let operators = parse_operator_expression(operator_expression);
225        OperatorExpressionOperator {
226            operator: operator.to_string(),
227            operators,
228        }
229    }
230
231    fn parse_list_operator_type(operator_type: Pair<Rule>) -> ListOperator {
232        let mut inner = operator_type.into_inner();
233        inner.next();
234        let operator = inner.next().unwrap();
235        inner.next();
236        let leaf_value_list = inner.next().unwrap();
237
238        ListOperator {
239            operator: operator.as_str().to_string(),
240            values: parse_leaf_value_list(leaf_value_list),
241        }
242    }
243
244    fn parse_value_operator_type(operator_type: Pair<Rule>) -> ValueOperator {
245        let mut inner = operator_type.into_inner();
246        inner.next(); // quotation_mark
247        let operator = inner.next().unwrap().as_str();
248        inner.next(); // quotation_mark
249        let leaf_value = inner.next().unwrap();
250
251        ValueOperator {
252            operator: operator.to_string(),
253            value: parse_leaf_value(leaf_value),
254        }
255    }
256
257    fn parse_leaf_value_list(leaf_value_list: Pair<Rule>) -> Vec<LeafValue> {
258        leaf_value_list
259            .into_inner()
260            .map(|pair| parse_leaf_value(pair))
261            .collect()
262    }
263
264    Ok(parse_query(pair))
265}
266
267#[cfg(test)]
268mod tests {
269    use super::*;
270    use serde_json::json;
271
272    #[test]
273    fn test_parse_simple() {
274        let expression = parse(r#"{"status": "1"}"#).unwrap();
275        assert_eq!(
276            expression,
277            Expression {
278                clauses: vec![Clause::Leaf(LeafClause {
279                    key: "status".to_string(),
280                    value: Value::Leaf(LeafValue { value: json!("1") })
281                })]
282            }
283        );
284    }
285
286    #[test]
287    fn test_parse_true_bool() {
288        let expression = parse(r#"{"status": true}"#).unwrap();
289        assert_eq!(
290            expression,
291            Expression {
292                clauses: vec![Clause::Leaf(LeafClause {
293                    key: "status".to_string(),
294                    value: Value::Leaf(LeafValue { value: json!(true) })
295                })]
296            }
297        );
298    }
299
300    #[test]
301    fn test_parse_false_bool() {
302        let expression = parse(r#"{"status": false}"#).unwrap();
303        assert_eq!(
304            expression,
305            Expression {
306                clauses: vec![Clause::Leaf(LeafClause {
307                    key: "status".to_string(),
308                    value: Value::Leaf(LeafValue {
309                        value: json!(false)
310                    })
311                })]
312            }
313        );
314    }
315
316    #[test]
317    fn test_parse_null() {
318        let expression = parse(r#"{"status": null}"#).unwrap();
319        assert_eq!(
320            expression,
321            Expression {
322                clauses: vec![Clause::Leaf(LeafClause {
323                    key: "status".to_string(),
324                    value: Value::Leaf(LeafValue { value: json!(null) })
325                })]
326            }
327        );
328    }
329
330    #[test]
331    fn test_parse_simple_with_extended_double() {
332        let expression = parse(r#"{"x":{"$f":1.2}}"#).unwrap();
333        assert_eq!(
334            expression,
335            Expression {
336                clauses: vec![Clause::Leaf(LeafClause {
337                    key: "x".to_string(),
338                    value: Value::Leaf(LeafValue { value: json!(1.2) })
339                })]
340            }
341        );
342    }
343
344    #[test]
345    fn test_parse_simple_with_alt_extended_double() {
346        let expression = parse(r#"{"status": { "$numberDecimal": 1.2 }}"#).unwrap();
347        assert_eq!(
348            expression,
349            Expression {
350                clauses: vec![Clause::Leaf(LeafClause {
351                    key: "status".to_string(),
352                    value: Value::Leaf(LeafValue { value: json!(1.2) })
353                })]
354            }
355        );
356    }
357
358    #[test]
359    fn test_parse_simple_with_double() {
360        let expression = parse(r#"{"status": 1.2}"#).unwrap();
361        assert_eq!(
362            expression,
363            Expression {
364                clauses: vec![Clause::Leaf(LeafClause {
365                    key: "status".to_string(),
366                    value: Value::Leaf(LeafValue { value: json!(1.2) })
367                })]
368            }
369        );
370    }
371
372    #[test]
373    fn test_parse_with_regex() {
374        // FIXME support regex
375        // let expression =
376        //     parse(r#"{"status":"A","$or":[{"qty":{"$lt":30}},{"item":{"$regex":"/^p/"}}]}"#)
377        //         .unwrap();
378        // assert_eq!(expression, Expression { clauses: vec![] });
379    }
380
381    #[test]
382    fn test_parse_with_or() {
383        let expression = parse(r#"{"$or":[{"status":"A"},{"qty":{"$lt":30}}]}"#).unwrap();
384        assert_eq!(
385            expression,
386            Expression {
387                clauses: vec![Clause::ExpressionTree(ExpressionTreeClause {
388                    operator: "$or".to_string(),
389                    expressions: vec![
390                        Expression {
391                            clauses: vec![Clause::Leaf(LeafClause {
392                                key: "status".to_string(),
393                                value: Value::Leaf(LeafValue { value: json!("A") })
394                            })],
395                        },
396                        Expression {
397                            clauses: vec![Clause::Leaf(LeafClause {
398                                key: "qty".to_string(),
399                                value: Value::Operators(vec![Operator::Value(ValueOperator {
400                                    operator: "$lt".to_string(),
401                                    value: LeafValue { value: json!(30) }
402                                })])
403                            })],
404                        },
405                    ],
406                })]
407            }
408        );
409    }
410
411    #[test]
412    fn test_parse_with_list_operator() {
413        let expression = parse(r#"{"status":{"$in":["A","D"]},"x":2}"#).unwrap();
414        assert_eq!(
415            expression,
416            Expression {
417                clauses: vec![
418                    Clause::Leaf(LeafClause {
419                        key: "status".to_string(),
420                        value: Value::Operators(vec![Operator::List(ListOperator {
421                            operator: "$in".to_string(),
422                            values: vec![
423                                LeafValue { value: json!("A") },
424                                LeafValue { value: json!("D") },
425                            ],
426                        }),]),
427                    }),
428                    Clause::Leaf(LeafClause {
429                        key: "x".to_string(),
430                        value: Value::Leaf(LeafValue { value: json!(2) })
431                    }),
432                ]
433            }
434        );
435    }
436
437    #[test]
438    fn test_parse_simple_with_not() {
439        let expression = parse(r#"{"age":{"$not":{"$gt":12}}}"#).unwrap();
440        assert_eq!(
441            expression,
442            Expression {
443                clauses: vec![Clause::Leaf(LeafClause {
444                    key: "age".to_string(),
445                    value: Value::Operators(vec![Operator::ExpressionOperator(
446                        OperatorExpressionOperator {
447                            operator: "$not".to_string(),
448                            operators: vec![Operator::Value(ValueOperator {
449                                operator: "$gt".to_string(),
450                                value: LeafValue { value: json!(12) }
451                            })]
452                        }
453                    )])
454                })]
455            }
456        );
457    }
458
459    #[test]
460    fn test_object_null() {
461        let parse = MongoDbParser::parse(Rule::object, r#"{"status": null}"#);
462        assert!(parse.is_ok());
463    }
464
465    #[test]
466    fn test_object_string() {
467        let parse = MongoDbParser::parse(Rule::object, r#"{"status": "some"}"#);
468        assert!(parse.is_ok());
469    }
470
471    #[test]
472    fn test_member_null() {
473        let parse = MongoDbParser::parse(Rule::member, r#""status": null"#);
474        assert!(parse.is_ok());
475    }
476
477    #[test]
478    fn test_member_false() {
479        let parse = MongoDbParser::parse(Rule::member, r#""status": false"#);
480        assert!(parse.is_ok());
481    }
482
483    #[test]
484    fn test_member_true() {
485        let parse = MongoDbParser::parse(Rule::member, r#""status": true"#);
486        assert!(parse.is_ok());
487    }
488
489    #[test]
490    fn test_member_string() {
491        let parse = MongoDbParser::parse(Rule::member, r#""status": "true""#);
492        assert!(parse.is_ok());
493    }
494
495    #[test]
496    fn test_member_decimal_number() {
497        let parse = MongoDbParser::parse(Rule::member, r#""status": 1.2"#);
498        assert!(parse.is_ok());
499    }
500
501    #[test]
502    fn test_member_explicit_decimal_number() {
503        let parse = MongoDbParser::parse(Rule::member, r#""status": { "$numberDecimal": 1.2 }"#);
504        assert!(parse.is_ok());
505    }
506
507    #[test]
508    fn test_member_explicit_alt_decimal_number() {
509        let parse = MongoDbParser::parse(Rule::member, r#""status": { "$f": 1.2 }"#);
510        assert!(parse.is_ok());
511    }
512}