gq_core/
parser.rs

1use crate::lexer::{self, Token};
2use crate::query::query_arguments::{
3    QueryArgument, QueryArgumentOperation, QueryArgumentValue, QueryArguments,
4};
5use crate::query::query_key::{AtomicQueryKey, QueryKey, RawKey};
6use crate::query::{ChildQuery, ChildQueryBuilder, Query, QueryBuilder};
7use logos::{Logos, Span, SpannedIter};
8use regex::Regex;
9use std::iter::Peekable;
10use std::str::FromStr;
11use thiserror::Error;
12
13pub type Result<T> = std::result::Result<T, Error>;
14type SpannedToken = (Token, Span);
15type SpannedTokenRef<'a> = (&'a Token, Span);
16
17#[derive(Error, Debug)]
18pub enum Error {
19    // TODO: Group parser errors inside a ParserError enum?
20    #[error("Unexpected token '{0}'")]
21    UnexpectedToken(Token, Span),
22    #[error("Unexpected end of input")]
23    UnexpectedEndOfInput(Span),
24    #[error("Unexpected token after root query")]
25    UnexpectedTokenAfterRootQuery(Span),
26    #[error("Lexer Error: {0}")]
27    Lexer(lexer::Error, Span),
28    #[error("Query construction error: {0}")]
29    Construction(crate::query::Error, Span),
30    #[error("Regex parsing error: {0}")]
31    Regex(regex::Error, Span),
32}
33
34impl Error {
35    pub fn span(&self) -> &Span {
36        match self {
37            Self::UnexpectedToken(_, span) => span,
38            Self::UnexpectedEndOfInput(span) => span,
39            Self::UnexpectedTokenAfterRootQuery(span) => span,
40            Self::Lexer(_, span) => span,
41            Self::Construction(_, span) => span,
42            Self::Regex(_, span) => span,
43        }
44    }
45}
46
47pub struct Parser<'src> {
48    lexer: Peekable<SpannedIter<'src, Token>>,
49    source: &'src str,
50}
51
52impl<'src> Parser<'src> {
53    pub fn new(source: &'src str) -> Self {
54        Self {
55            lexer: Token::lexer(source).spanned().peekable(),
56            source,
57        }
58    }
59
60    pub fn parse(&mut self) -> Result<Query> {
61        let query = self.parse_root_query()?;
62
63        if self.lexer.next().is_some() {
64            // TODO: use this error or the generic one?
65            return Err(Error::UnexpectedTokenAfterRootQuery(self.last_span()));
66        }
67
68        Ok(query)
69    }
70
71    fn last_span(&self) -> Span {
72        self.source.len()..self.source.len()
73    }
74
75    fn current_span(&mut self) -> Result<Span> {
76        self.peek().map(|(_, span)| span)
77    }
78
79    fn span_between(start: Span, end: Span) -> Span {
80        start.start..end.end
81    }
82
83    fn peek(&mut self) -> Result<SpannedTokenRef<'_>> {
84        match self.lexer.peek() {
85            Some((token, span)) => {
86                let token = token
87                    .as_ref()
88                    .map_err(|err| Error::Lexer(err.clone(), span.clone()))?;
89                Ok((token, span.clone()))
90            }
91            None => Ok((&Token::EOF, self.source.len()..self.source.len())),
92        }
93    }
94
95    fn consume(&mut self) -> Result<Span> {
96        self.next_token().map(|(_, span)| span)
97    }
98
99    fn next_token(&mut self) -> Result<SpannedToken> {
100        let spanned_token = self
101            .lexer
102            .next()
103            // TODO: return this error or just output the EOF token as the peek fn does?
104            .ok_or_else(|| Error::UnexpectedEndOfInput(self.last_span()))?;
105
106        let (token, span) = spanned_token;
107        let token = token.map_err(|err| Error::Lexer(err, span.clone()))?;
108        Ok((token, span))
109    }
110
111    /// # Grammar
112    /// `S -> QUERY_ARGUMENTS ROOT_QUERY_KEY | QUERY_ARGUMENTS ROOT_QUERY_KEY { QUERY_CONTENT }`
113    fn parse_root_query(&mut self) -> Result<Query> {
114        let root_span_start = self.current_span()?;
115        let arguments = self.parse_query_arguments()?;
116        let root_query_key = self.parse_root_query_key()?;
117
118        match self.peek()? {
119            (Token::LBrace, _) => {
120                self.consume()?;
121                let children = self.parse_query_content(&Token::RBrace)?;
122                let root_span_end = self.consume()?;
123                let root_span = Self::span_between(root_span_start, root_span_end);
124
125                QueryBuilder::default()
126                    .arguments(arguments)
127                    .children(children)
128                    .key(root_query_key)
129                    .build()
130                    .map_err(|err| Error::Construction(err.into(), root_span))
131            }
132            (_, root_span_end) => {
133                let root_span = Self::span_between(root_span_start, root_span_end);
134                QueryBuilder::default()
135                    .arguments(arguments)
136                    .key(root_query_key)
137                    .build()
138                    .map_err(|err| Error::Construction(err.into(), root_span))
139            }
140        }
141    }
142
143    /// # Grammar
144    /// `QUERY_CONTENT -> QUERY QUERY_CONTENT | ε`
145    fn parse_query_content(&mut self, stop_token: &Token) -> Result<Vec<ChildQuery>> {
146        let mut queries = Vec::new();
147
148        loop {
149            match self.peek()? {
150                (token, _) if token == stop_token => return Ok(queries),
151                _ => {
152                    let query = self.parse_query()?;
153                    queries.push(query);
154                }
155            }
156        }
157    }
158
159    /// # Grammar
160    /// `QUERY -> QUERY_KEY QUERY_ALIAS | QUERY_KEY QUERY_ALIAS { QUERY_CONTENT }
161    fn parse_query(&mut self) -> Result<ChildQuery> {
162        let query_span_start = self.current_span()?;
163        let query_key = self.parse_query_key()?;
164        let query_alias = self.parse_query_alias()?;
165
166        match self.peek()? {
167            (Token::LBrace, _) => {
168                self.consume()?;
169                let children = self.parse_query_content(&Token::RBrace)?;
170                let query_span_end = self.consume()?;
171                let query_span = Self::span_between(query_span_start, query_span_end);
172
173                ChildQueryBuilder::default()
174                    .key(query_key)
175                    .alias(query_alias)
176                    .children(children)
177                    .build()
178                    .map_err(|err| Error::Construction(err.into(), query_span))
179            }
180            (_, query_span_end) => {
181                let query_span = Self::span_between(query_span_start, query_span_end);
182                ChildQueryBuilder::default()
183                    .key(query_key)
184                    .alias(query_alias)
185                    .build()
186                    // TODO: We should take the end span from the query alias function
187                    .map_err(|err| Error::Construction(err.into(), query_span))
188            }
189        }
190    }
191
192    /// # Grammar
193    /// `ROOT_QUERY_KEY -> QUERY_KEY | ε`
194    fn parse_root_query_key(&mut self) -> Result<QueryKey> {
195        match self.peek()? {
196            // We have to know what comes next due to the epsilon rule
197            (Token::Identifier(_), _) | (Token::String(_), _) => self.parse_query_key(),
198            _ => Ok(Default::default()),
199        }
200    }
201
202    /// # Grammar
203    /// `QUERY_KEY -> ATOMIC_QUERY_KEY . QUERY_KEY | ATOMIC_QUERY_KEY`
204    fn parse_query_key(&mut self) -> Result<QueryKey> {
205        let mut keys = Vec::new();
206        loop {
207            let atomic_query_key = self.parse_atomic_query_key()?;
208            keys.push(atomic_query_key);
209            match self.peek()? {
210                (Token::Dot, _) => {
211                    self.consume()?;
212                }
213                _ => return Ok(QueryKey::new(keys)),
214            }
215        }
216    }
217
218    /// # Grammar
219    /// `ATOMIC_QUERY_KEY -> RAW_KEY QUERY_ARGUMENTS`
220    fn parse_atomic_query_key(&mut self) -> Result<AtomicQueryKey> {
221        let raw_key = self.parse_raw_key()?;
222        let arguments = self.parse_query_arguments()?;
223        Ok(AtomicQueryKey::new(raw_key, arguments))
224    }
225
226    /// # Grammar
227    /// `RAW_KEY -> key | string`
228    fn parse_raw_key(&mut self) -> Result<RawKey> {
229        match self.next_token()? {
230            (Token::Identifier(key), _) => Ok(RawKey::Identifier(key)),
231            (Token::String(key), _) => Ok(RawKey::String(key)),
232            (unexpected_token, span) => Err(Error::UnexpectedToken(unexpected_token, span)),
233        }
234    }
235
236    /// # Grammar
237    /// `QUERY_ALIAS -> : RAW_KEY | ε`
238    fn parse_query_alias(&mut self) -> Result<Option<RawKey>> {
239        match self.peek()? {
240            (Token::Colon, _) => {
241                self.consume()?;
242                self.parse_raw_key().map(Some)
243            }
244            _ => Ok(None),
245        }
246    }
247
248    /// # Grammar
249    /// `QUERY_ARGUMENTS -> ( QUERY_ARGUMENTS_CONTENT ) | ε`
250    fn parse_query_arguments(&mut self) -> Result<QueryArguments> {
251        match self.peek()? {
252            (Token::LParen, _) => {
253                self.consume()?;
254                let arguments = QueryArguments::new(self.parse_query_arguments_content()?);
255                match self.next_token()? {
256                    (Token::RParen, _) => Ok(arguments),
257                    (unexpected_token, span) => Err(Error::UnexpectedToken(unexpected_token, span)),
258                }
259            }
260            _ => Ok(Default::default()),
261        }
262    }
263
264    /// # Grammar
265    /// `QUERY_ARGUMENTS_CONTENT -> QUERY_ARGUMENT , QUERY_ARGUMENTS_CONTENT | QUERY_ARGUMENT`
266    fn parse_query_arguments_content(&mut self) -> Result<Vec<QueryArgument>> {
267        let mut arguments = Vec::new();
268
269        loop {
270            let argument = self.parse_query_argument()?;
271            arguments.push(argument);
272
273            match self.peek()? {
274                (Token::Comma, _) => {
275                    self.consume()?;
276                }
277                _ => return Ok(arguments),
278            }
279        }
280    }
281
282    /// # Grammar
283    /// `QUERY_ARGUMENT -> QUERY_KEY QUERY_AGUMENT_OPERATION`
284    fn parse_query_argument(&mut self) -> Result<QueryArgument> {
285        let key = self.parse_query_key()?;
286        let operation = self.parse_query_argument_operation()?;
287        Ok(QueryArgument::new(key, operation))
288    }
289
290    /// # Grammar
291    /// `QUERY_AGUMENT_OPERATION -> = QUERY_ARGUMENT_VALUE | != QUERY_ARGUMENT_VALUE
292    ///     | > NUMBER | >= NUMBER
293    ///     | < NUMBER | <= NUMBER
294    ///     | ~ REGEX | !~ REGEX`
295    fn parse_query_argument_operation(&mut self) -> Result<QueryArgumentOperation> {
296        match self.next_token()? {
297            (Token::Equal, _) => Ok(QueryArgumentOperation::Equal(
298                self.parse_query_argument_value()?,
299            )),
300            (Token::NotEqual, _) => Ok(QueryArgumentOperation::NotEqual(
301                self.parse_query_argument_value()?,
302            )),
303            (Token::Greater, _) => Ok(QueryArgumentOperation::Greater(self.parse_number()?)),
304            (Token::GreaterEqual, _) => {
305                Ok(QueryArgumentOperation::GreaterEqual(self.parse_number()?))
306            }
307            (Token::Less, _) => Ok(QueryArgumentOperation::Less(self.parse_number()?)),
308            (Token::LessEqual, _) => Ok(QueryArgumentOperation::LessEqual(self.parse_number()?)),
309            (Token::Tilde, _) => Ok(QueryArgumentOperation::Match(self.parse_regex()?)),
310            (Token::NotTilde, _) => Ok(QueryArgumentOperation::NotMatch(self.parse_regex()?)),
311            (unexpected_token, span) => Err(Error::UnexpectedToken(unexpected_token, span)),
312        }
313    }
314
315    /// # Grammar
316    /// `QUERY_ARGUMENT_VALUE -> string | number | boolean | null`
317    fn parse_query_argument_value(&mut self) -> Result<QueryArgumentValue> {
318        match self.next_token()? {
319            (Token::String(value), _) => Ok(QueryArgumentValue::String(value)),
320            (Token::Number(value), _) => Ok(QueryArgumentValue::Number(value)),
321            (Token::Bool(value), _) => Ok(QueryArgumentValue::Bool(value)),
322            (Token::Null, _) => Ok(QueryArgumentValue::Null),
323            (unexpected_token, span) => Err(Error::UnexpectedToken(unexpected_token, span)),
324        }
325    }
326    /// # Grammar
327    /// `NUMBER -> number`
328    fn parse_number(&mut self) -> Result<f64> {
329        match self.next_token()? {
330            (Token::Number(value), _) => Ok(value),
331            (unexpected_token, span) => Err(Error::UnexpectedToken(unexpected_token, span)),
332        }
333    }
334
335    /// # Grammar
336    /// `REGEX -> regex`
337    fn parse_regex(&mut self) -> Result<Regex> {
338        match self.next_token()? {
339            (Token::String(value), span) => {
340                Regex::new(&value).map_err(|err| Error::Regex(err, span))
341            }
342            (unexpected_token, span) => Err(Error::UnexpectedToken(unexpected_token, span)),
343        }
344    }
345}
346
347impl FromStr for Query {
348    type Err = Error;
349
350    fn from_str(s: &str) -> Result<Self> {
351        Parser::new(s).parse()
352    }
353}