Skip to main content

mxr_search/
parser.rs

1use crate::ast::*;
2use chrono::{Duration, Local, NaiveDate};
3use thiserror::Error;
4
5// -- Tokens -------------------------------------------------------------------
6
7#[derive(Debug, Clone, PartialEq)]
8enum Token {
9    Word(String),
10    Phrase(String),
11    Colon,
12    Minus,
13    LParen,
14    RParen,
15    And,
16    Or,
17    Not,
18}
19
20// -- Errors -------------------------------------------------------------------
21
22#[derive(Debug, Error, PartialEq)]
23pub enum ParseError {
24    #[error("unexpected end of input")]
25    UnexpectedEnd,
26    #[error("unexpected token: {0:?}")]
27    UnexpectedToken(String),
28    #[error("unmatched parenthesis")]
29    UnmatchedParen,
30    #[error("expected value after field")]
31    ExpectedValue,
32    #[error("unknown filter: {0}")]
33    UnknownFilter(String),
34    #[error("invalid size: {0}")]
35    InvalidSize(String),
36    #[error("invalid date: {0}")]
37    InvalidDate(String),
38}
39
40// -- Tokenizer ----------------------------------------------------------------
41
42fn tokenize(input: &str) -> Result<Vec<Token>, ParseError> {
43    let mut tokens = Vec::new();
44    let mut chars = input.chars().peekable();
45
46    while let Some(&ch) = chars.peek() {
47        match ch {
48            ' ' | '\t' | '\n' | '\r' => {
49                chars.next();
50            }
51            '(' => {
52                chars.next();
53                tokens.push(Token::LParen);
54            }
55            ')' => {
56                chars.next();
57                tokens.push(Token::RParen);
58            }
59            ':' => {
60                chars.next();
61                tokens.push(Token::Colon);
62            }
63            '-' => {
64                chars.next();
65                tokens.push(Token::Minus);
66            }
67            '"' => {
68                chars.next();
69                let mut s = String::new();
70                loop {
71                    match chars.next() {
72                        Some('"') => break,
73                        Some(c) => s.push(c),
74                        None => break,
75                    }
76                }
77                tokens.push(Token::Phrase(s));
78            }
79            _ => {
80                let mut word = String::new();
81                while let Some(&c) = chars.peek() {
82                    if c.is_whitespace() || c == '(' || c == ')' || c == ':' || c == '"' {
83                        break;
84                    }
85                    word.push(c);
86                    chars.next();
87                }
88                match word.as_str() {
89                    "AND" => tokens.push(Token::And),
90                    "OR" => tokens.push(Token::Or),
91                    "NOT" => tokens.push(Token::Not),
92                    _ => tokens.push(Token::Word(word)),
93                }
94            }
95        }
96    }
97
98    Ok(tokens)
99}
100
101// -- Parser -------------------------------------------------------------------
102
103struct Parser {
104    tokens: Vec<Token>,
105    pos: usize,
106}
107
108impl Parser {
109    fn new(tokens: Vec<Token>) -> Self {
110        Self { tokens, pos: 0 }
111    }
112
113    fn peek(&self) -> Option<&Token> {
114        self.tokens.get(self.pos)
115    }
116
117    fn next(&mut self) -> Option<Token> {
118        let tok = self.tokens.get(self.pos).cloned();
119        if tok.is_some() {
120            self.pos += 1;
121        }
122        tok
123    }
124
125    fn at_end(&self) -> bool {
126        self.pos >= self.tokens.len()
127    }
128
129    /// Top-level: parse_expression handles implicit AND between atoms
130    fn parse_expression(&mut self) -> Result<QueryNode, ParseError> {
131        let mut left = self.parse_or()?;
132
133        while !self.at_end() {
134            // Stop if we see a closing paren (handled by caller)
135            if matches!(self.peek(), Some(Token::RParen)) {
136                break;
137            }
138            // Stop if next is OR (handled by parse_or caller)
139            if matches!(self.peek(), Some(Token::Or)) {
140                break;
141            }
142            // Consume optional AND keyword
143            if matches!(self.peek(), Some(Token::And)) {
144                self.next();
145            }
146            if self.at_end() || matches!(self.peek(), Some(Token::RParen | Token::Or)) {
147                break;
148            }
149            let right = self.parse_or()?;
150            left = QueryNode::And(Box::new(left), Box::new(right));
151        }
152
153        Ok(left)
154    }
155
156    fn parse_or(&mut self) -> Result<QueryNode, ParseError> {
157        let mut left = self.parse_unary()?;
158
159        while matches!(self.peek(), Some(Token::Or)) {
160            self.next(); // consume OR
161            let right = self.parse_unary()?;
162            left = QueryNode::Or(Box::new(left), Box::new(right));
163        }
164
165        Ok(left)
166    }
167
168    fn parse_unary(&mut self) -> Result<QueryNode, ParseError> {
169        match self.peek() {
170            Some(Token::Minus) => {
171                self.next();
172                let node = self.parse_atom()?;
173                Ok(QueryNode::Not(Box::new(node)))
174            }
175            Some(Token::Not) => {
176                self.next();
177                let node = self.parse_atom()?;
178                Ok(QueryNode::Not(Box::new(node)))
179            }
180            _ => self.parse_atom(),
181        }
182    }
183
184    fn parse_atom(&mut self) -> Result<QueryNode, ParseError> {
185        match self.peek() {
186            Some(Token::LParen) => {
187                self.next(); // consume (
188                let node = self.parse_expression()?;
189                match self.next() {
190                    Some(Token::RParen) => Ok(node),
191                    _ => Err(ParseError::UnmatchedParen),
192                }
193            }
194            Some(Token::Phrase(s)) => {
195                let s = s.clone();
196                self.next();
197                Ok(QueryNode::Phrase(s))
198            }
199            Some(Token::Word(_)) => {
200                // Check if this is a field:value pattern
201                if self.pos + 1 < self.tokens.len()
202                    && matches!(self.tokens[self.pos + 1], Token::Colon)
203                {
204                    return self.parse_field_value();
205                }
206                let word = match self.next() {
207                    Some(Token::Word(w)) => w,
208                    _ => unreachable!(),
209                };
210                Ok(QueryNode::Text(word))
211            }
212            Some(tok) => Err(ParseError::UnexpectedToken(format!("{:?}", tok))),
213            None => Err(ParseError::UnexpectedEnd),
214        }
215    }
216
217    fn parse_field_value(&mut self) -> Result<QueryNode, ParseError> {
218        let field_name = match self.next() {
219            Some(Token::Word(w)) => w,
220            _ => return Err(ParseError::UnexpectedEnd),
221        };
222
223        // consume colon
224        match self.next() {
225            Some(Token::Colon) => {}
226            _ => return Err(ParseError::ExpectedValue),
227        }
228
229        let value = match self.next() {
230            Some(Token::Word(w)) => w,
231            Some(Token::Phrase(p)) => p,
232            _ => return Err(ParseError::ExpectedValue),
233        };
234
235        match field_name.to_lowercase().as_str() {
236            "from" => Ok(QueryNode::Field {
237                field: QueryField::From,
238                value,
239            }),
240            "to" => Ok(QueryNode::Field {
241                field: QueryField::To,
242                value,
243            }),
244            "cc" => Ok(QueryNode::Field {
245                field: QueryField::Cc,
246                value,
247            }),
248            "bcc" => Ok(QueryNode::Field {
249                field: QueryField::Bcc,
250                value,
251            }),
252            "subject" => Ok(QueryNode::Field {
253                field: QueryField::Subject,
254                value,
255            }),
256            "body" => Ok(QueryNode::Field {
257                field: QueryField::Body,
258                value,
259            }),
260            "filename" => Ok(QueryNode::Field {
261                field: QueryField::Filename,
262                value,
263            }),
264            "label" => Ok(QueryNode::Label(value)),
265            "is" => match value.to_lowercase().as_str() {
266                "unread" => Ok(QueryNode::Filter(FilterKind::Unread)),
267                "read" => Ok(QueryNode::Filter(FilterKind::Read)),
268                "starred" => Ok(QueryNode::Filter(FilterKind::Starred)),
269                "draft" | "drafts" => Ok(QueryNode::Filter(FilterKind::Draft)),
270                "sent" => Ok(QueryNode::Filter(FilterKind::Sent)),
271                "trash" | "deleted" => Ok(QueryNode::Filter(FilterKind::Trash)),
272                "spam" | "junk" => Ok(QueryNode::Filter(FilterKind::Spam)),
273                "answered" | "replied" => Ok(QueryNode::Filter(FilterKind::Answered)),
274                "inbox" => Ok(QueryNode::Filter(FilterKind::Inbox)),
275                "archived" | "archive" => Ok(QueryNode::Filter(FilterKind::Archived)),
276                other => Err(ParseError::UnknownFilter(other.to_string())),
277            },
278            "has" => match value.to_lowercase().as_str() {
279                "attachment" | "attachments" => Ok(QueryNode::Filter(FilterKind::HasAttachment)),
280                other => Err(ParseError::UnknownFilter(other.to_string())),
281            },
282            "size" => {
283                let (op, bytes) = parse_size_value(&value)?;
284                Ok(QueryNode::Size { op, bytes })
285            }
286            "after" => {
287                let date = parse_date_value(&value)?;
288                Ok(QueryNode::DateRange {
289                    bound: DateBound::After,
290                    date,
291                })
292            }
293            "before" => {
294                let date = parse_date_value(&value)?;
295                Ok(QueryNode::DateRange {
296                    bound: DateBound::Before,
297                    date,
298                })
299            }
300            "date" => {
301                let date = parse_date_value(&value)?;
302                Ok(QueryNode::DateRange {
303                    bound: DateBound::Exact,
304                    date,
305                })
306            }
307            "older" => {
308                let date = parse_relative_duration_date(&value)?;
309                Ok(QueryNode::DateRange {
310                    bound: DateBound::Before,
311                    date: DateValue::Specific(date),
312                })
313            }
314            "newer" => {
315                let date = parse_relative_duration_date(&value)?;
316                Ok(QueryNode::DateRange {
317                    bound: DateBound::After,
318                    date: DateValue::Specific(date),
319                })
320            }
321            other => Err(ParseError::UnknownFilter(other.to_string())),
322        }
323    }
324}
325
326fn parse_date_value(s: &str) -> Result<DateValue, ParseError> {
327    match s.to_lowercase().as_str() {
328        "today" => Ok(DateValue::Today),
329        "yesterday" => Ok(DateValue::Yesterday),
330        "this-week" => Ok(DateValue::ThisWeek),
331        "this-month" => Ok(DateValue::ThisMonth),
332        _ => {
333            let date = NaiveDate::parse_from_str(s, "%Y-%m-%d")
334                .map_err(|_| ParseError::InvalidDate(s.to_string()))?;
335            Ok(DateValue::Specific(date))
336        }
337    }
338}
339
340fn parse_relative_duration_date(s: &str) -> Result<NaiveDate, ParseError> {
341    let input = s.trim().to_lowercase();
342    if input.len() < 2 {
343        return Err(ParseError::InvalidDate(s.to_string()));
344    }
345
346    let (amount, unit) = input.split_at(input.len() - 1);
347    let count = amount
348        .parse::<i64>()
349        .map_err(|_| ParseError::InvalidDate(s.to_string()))?;
350    let days = match unit {
351        "d" => count,
352        "w" => count * 7,
353        "m" => count * 30,
354        "y" => count * 365,
355        _ => return Err(ParseError::InvalidDate(s.to_string())),
356    };
357
358    Ok(Local::now().date_naive() - Duration::days(days))
359}
360
361fn parse_size_value(s: &str) -> Result<(SizeOp, u64), ParseError> {
362    let input = s.trim().to_lowercase();
363    if input.is_empty() {
364        return Err(ParseError::InvalidSize(s.to_string()));
365    }
366
367    let (op, rest) = if let Some(rest) = input.strip_prefix(">=") {
368        (SizeOp::GreaterThanOrEqual, rest)
369    } else if let Some(rest) = input.strip_prefix("<=") {
370        (SizeOp::LessThanOrEqual, rest)
371    } else if let Some(rest) = input.strip_prefix('>') {
372        (SizeOp::GreaterThan, rest)
373    } else if let Some(rest) = input.strip_prefix('<') {
374        (SizeOp::LessThan, rest)
375    } else if let Some(rest) = input.strip_prefix('=') {
376        (SizeOp::Equal, rest)
377    } else {
378        (SizeOp::Equal, input.as_str())
379    };
380
381    let number_end = rest
382        .find(|ch: char| !ch.is_ascii_digit() && ch != '.')
383        .unwrap_or(rest.len());
384    let (number_part, unit_part) = rest.split_at(number_end);
385    if number_part.is_empty() {
386        return Err(ParseError::InvalidSize(s.to_string()));
387    }
388
389    let value = number_part
390        .parse::<f64>()
391        .map_err(|_| ParseError::InvalidSize(s.to_string()))?;
392    let multiplier = match unit_part {
393        "" | "b" => 1_f64,
394        "k" | "kb" => 1024_f64,
395        "m" | "mb" => 1024_f64 * 1024_f64,
396        "g" | "gb" => 1024_f64 * 1024_f64 * 1024_f64,
397        other => return Err(ParseError::InvalidSize(other.to_string())),
398    };
399
400    Ok((op, (value * multiplier).round() as u64))
401}
402
403// -- Public API ---------------------------------------------------------------
404
405pub fn parse_query(input: &str) -> Result<QueryNode, ParseError> {
406    let input = input.trim();
407    if input.is_empty() {
408        return Err(ParseError::UnexpectedEnd);
409    }
410    let tokens = tokenize(input)?;
411    if tokens.is_empty() {
412        return Err(ParseError::UnexpectedEnd);
413    }
414    let mut parser = Parser::new(tokens);
415    let node = parser.parse_expression()?;
416    if !parser.at_end() && matches!(parser.peek(), Some(Token::RParen)) {
417        return Err(ParseError::UnmatchedParen);
418    }
419    Ok(node)
420}
421
422// -- Tests --------------------------------------------------------------------
423
424#[cfg(test)]
425mod tests {
426    use super::*;
427    use chrono::NaiveDate;
428
429    #[test]
430    fn parse_single_word() {
431        let result = parse_query("deployment").unwrap();
432        assert_eq!(result, QueryNode::Text("deployment".to_string()));
433    }
434
435    #[test]
436    fn parse_phrase() {
437        let result = parse_query("\"deployment plan\"").unwrap();
438        assert_eq!(result, QueryNode::Phrase("deployment plan".to_string()));
439    }
440
441    #[test]
442    fn parse_from_field() {
443        let result = parse_query("from:alice@example.com").unwrap();
444        assert_eq!(
445            result,
446            QueryNode::Field {
447                field: QueryField::From,
448                value: "alice@example.com".to_string(),
449            }
450        );
451    }
452
453    #[test]
454    fn parse_to_field() {
455        let result = parse_query("to:bob").unwrap();
456        assert_eq!(
457            result,
458            QueryNode::Field {
459                field: QueryField::To,
460                value: "bob".to_string(),
461            }
462        );
463    }
464
465    #[test]
466    fn parse_cc_bcc_and_body_fields() {
467        assert_eq!(
468            parse_query("cc:alice@example.com").unwrap(),
469            QueryNode::Field {
470                field: QueryField::Cc,
471                value: "alice@example.com".to_string(),
472            }
473        );
474        assert_eq!(
475            parse_query("bcc:hidden@example.com").unwrap(),
476            QueryNode::Field {
477                field: QueryField::Bcc,
478                value: "hidden@example.com".to_string(),
479            }
480        );
481        assert_eq!(
482            parse_query("body:\"deploy canary\"").unwrap(),
483            QueryNode::Field {
484                field: QueryField::Body,
485                value: "deploy canary".to_string(),
486            }
487        );
488    }
489
490    #[test]
491    fn parse_subject_field() {
492        let result = parse_query("subject:invoice").unwrap();
493        assert_eq!(
494            result,
495            QueryNode::Field {
496                field: QueryField::Subject,
497                value: "invoice".to_string(),
498            }
499        );
500    }
501
502    #[test]
503    fn parse_is_unread() {
504        let result = parse_query("is:unread").unwrap();
505        assert_eq!(result, QueryNode::Filter(FilterKind::Unread));
506    }
507
508    #[test]
509    fn parse_is_starred() {
510        let result = parse_query("is:starred").unwrap();
511        assert_eq!(result, QueryNode::Filter(FilterKind::Starred));
512    }
513
514    #[test]
515    fn parse_additional_is_filters() {
516        assert_eq!(
517            parse_query("is:sent").unwrap(),
518            QueryNode::Filter(FilterKind::Sent)
519        );
520        assert_eq!(
521            parse_query("is:draft").unwrap(),
522            QueryNode::Filter(FilterKind::Draft)
523        );
524        assert_eq!(
525            parse_query("is:trash").unwrap(),
526            QueryNode::Filter(FilterKind::Trash)
527        );
528        assert_eq!(
529            parse_query("is:spam").unwrap(),
530            QueryNode::Filter(FilterKind::Spam)
531        );
532        assert_eq!(
533            parse_query("is:answered").unwrap(),
534            QueryNode::Filter(FilterKind::Answered)
535        );
536        assert_eq!(
537            parse_query("is:inbox").unwrap(),
538            QueryNode::Filter(FilterKind::Inbox)
539        );
540        assert_eq!(
541            parse_query("is:archived").unwrap(),
542            QueryNode::Filter(FilterKind::Archived)
543        );
544    }
545
546    #[test]
547    fn parse_has_attachment() {
548        let result = parse_query("has:attachment").unwrap();
549        assert_eq!(result, QueryNode::Filter(FilterKind::HasAttachment));
550    }
551
552    #[test]
553    fn parse_label() {
554        let result = parse_query("label:work").unwrap();
555        assert_eq!(result, QueryNode::Label("work".to_string()));
556    }
557
558    #[test]
559    fn parse_date_after() {
560        let result = parse_query("after:2026-01-01").unwrap();
561        assert_eq!(
562            result,
563            QueryNode::DateRange {
564                bound: DateBound::After,
565                date: DateValue::Specific(NaiveDate::from_ymd_opt(2026, 1, 1).unwrap()),
566            }
567        );
568    }
569
570    #[test]
571    fn parse_date_before() {
572        let result = parse_query("before:2026-03-15").unwrap();
573        assert_eq!(
574            result,
575            QueryNode::DateRange {
576                bound: DateBound::Before,
577                date: DateValue::Specific(NaiveDate::from_ymd_opt(2026, 3, 15).unwrap()),
578            }
579        );
580    }
581
582    #[test]
583    fn parse_date_today() {
584        let result = parse_query("date:today").unwrap();
585        assert_eq!(
586            result,
587            QueryNode::DateRange {
588                bound: DateBound::Exact,
589                date: DateValue::Today,
590            }
591        );
592    }
593
594    #[test]
595    fn parse_older_relative_duration() {
596        let expected = Local::now().date_naive() - Duration::days(30);
597        let result = parse_query("older:30d").unwrap();
598        assert_eq!(
599            result,
600            QueryNode::DateRange {
601                bound: DateBound::Before,
602                date: DateValue::Specific(expected),
603            }
604        );
605    }
606
607    #[test]
608    fn parse_newer_relative_duration() {
609        let expected = Local::now().date_naive() - Duration::days(14);
610        let result = parse_query("newer:2w").unwrap();
611        assert_eq!(
612            result,
613            QueryNode::DateRange {
614                bound: DateBound::After,
615                date: DateValue::Specific(expected),
616            }
617        );
618    }
619
620    #[test]
621    fn reject_invalid_relative_duration_unit() {
622        let result = parse_query("older:30q");
623        assert_eq!(result, Err(ParseError::InvalidDate("30q".to_string())));
624    }
625
626    #[test]
627    fn parse_size_query() {
628        assert_eq!(
629            parse_query("size:>5mb").unwrap(),
630            QueryNode::Size {
631                op: SizeOp::GreaterThan,
632                bytes: 5 * 1024 * 1024,
633            }
634        );
635        assert_eq!(
636            parse_query("size:<=42kb").unwrap(),
637            QueryNode::Size {
638                op: SizeOp::LessThanOrEqual,
639                bytes: 42 * 1024,
640            }
641        );
642    }
643
644    #[test]
645    fn reject_invalid_size_unit() {
646        let result = parse_query("size:>5tb");
647        assert_eq!(result, Err(ParseError::InvalidSize("tb".to_string())));
648    }
649
650    #[test]
651    fn parse_implicit_and() {
652        let result = parse_query("invoice unread").unwrap();
653        assert_eq!(
654            result,
655            QueryNode::And(
656                Box::new(QueryNode::Text("invoice".to_string())),
657                Box::new(QueryNode::Text("unread".to_string())),
658            )
659        );
660    }
661
662    #[test]
663    fn parse_explicit_and() {
664        let result = parse_query("invoice AND unread").unwrap();
665        assert_eq!(
666            result,
667            QueryNode::And(
668                Box::new(QueryNode::Text("invoice".to_string())),
669                Box::new(QueryNode::Text("unread".to_string())),
670            )
671        );
672    }
673
674    #[test]
675    fn parse_or() {
676        let result = parse_query("invoice OR receipt").unwrap();
677        assert_eq!(
678            result,
679            QueryNode::Or(
680                Box::new(QueryNode::Text("invoice".to_string())),
681                Box::new(QueryNode::Text("receipt".to_string())),
682            )
683        );
684    }
685
686    #[test]
687    fn parse_not() {
688        let result = parse_query("-spam").unwrap();
689        assert_eq!(
690            result,
691            QueryNode::Not(Box::new(QueryNode::Text("spam".to_string())))
692        );
693
694        let result = parse_query("NOT spam").unwrap();
695        assert_eq!(
696            result,
697            QueryNode::Not(Box::new(QueryNode::Text("spam".to_string())))
698        );
699    }
700
701    #[test]
702    fn parse_parentheses() {
703        let result = parse_query("(from:alice OR from:bob) is:unread").unwrap();
704        assert_eq!(
705            result,
706            QueryNode::And(
707                Box::new(QueryNode::Or(
708                    Box::new(QueryNode::Field {
709                        field: QueryField::From,
710                        value: "alice".to_string(),
711                    }),
712                    Box::new(QueryNode::Field {
713                        field: QueryField::From,
714                        value: "bob".to_string(),
715                    }),
716                )),
717                Box::new(QueryNode::Filter(FilterKind::Unread)),
718            )
719        );
720    }
721
722    #[test]
723    fn parse_compound() {
724        let result = parse_query("from:alice subject:invoice is:unread after:2026-01-01").unwrap();
725        // Should be nested And: And(And(And(from, subject), filter), date)
726        assert_eq!(
727            result,
728            QueryNode::And(
729                Box::new(QueryNode::And(
730                    Box::new(QueryNode::And(
731                        Box::new(QueryNode::Field {
732                            field: QueryField::From,
733                            value: "alice".to_string(),
734                        }),
735                        Box::new(QueryNode::Field {
736                            field: QueryField::Subject,
737                            value: "invoice".to_string(),
738                        }),
739                    )),
740                    Box::new(QueryNode::Filter(FilterKind::Unread)),
741                )),
742                Box::new(QueryNode::DateRange {
743                    bound: DateBound::After,
744                    date: DateValue::Specific(NaiveDate::from_ymd_opt(2026, 1, 1).unwrap()),
745                }),
746            )
747        );
748    }
749}