1use crate::ast::*;
2use chrono::{Duration, Local, NaiveDate};
3use thiserror::Error;
4
5#[derive(Debug, Clone, PartialEq)]
8enum Token {
9 Word(String),
10 Phrase(String),
11 Colon,
12 Minus,
13 LParen,
14 RParen,
15 And,
16 Or,
17 Not,
18}
19
20#[derive(Debug, Error, PartialEq)]
23pub enum ParseError {
24 #[error("unexpected end of input")]
25 UnexpectedEnd,
26 #[error("unexpected token: {0:?}")]
27 UnexpectedToken(String),
28 #[error("unmatched parenthesis")]
29 UnmatchedParen,
30 #[error("expected value after field")]
31 ExpectedValue,
32 #[error("unknown filter: {0}")]
33 UnknownFilter(String),
34 #[error("invalid size: {0}")]
35 InvalidSize(String),
36 #[error("invalid date: {0}")]
37 InvalidDate(String),
38}
39
40fn tokenize(input: &str) -> Result<Vec<Token>, ParseError> {
43 let mut tokens = Vec::new();
44 let mut chars = input.chars().peekable();
45
46 while let Some(&ch) = chars.peek() {
47 match ch {
48 ' ' | '\t' | '\n' | '\r' => {
49 chars.next();
50 }
51 '(' => {
52 chars.next();
53 tokens.push(Token::LParen);
54 }
55 ')' => {
56 chars.next();
57 tokens.push(Token::RParen);
58 }
59 ':' => {
60 chars.next();
61 tokens.push(Token::Colon);
62 }
63 '-' => {
64 chars.next();
65 tokens.push(Token::Minus);
66 }
67 '"' => {
68 chars.next();
69 let mut s = String::new();
70 loop {
71 match chars.next() {
72 Some('"') => break,
73 Some(c) => s.push(c),
74 None => break,
75 }
76 }
77 tokens.push(Token::Phrase(s));
78 }
79 _ => {
80 let mut word = String::new();
81 while let Some(&c) = chars.peek() {
82 if c.is_whitespace() || c == '(' || c == ')' || c == ':' || c == '"' {
83 break;
84 }
85 word.push(c);
86 chars.next();
87 }
88 match word.as_str() {
89 "AND" => tokens.push(Token::And),
90 "OR" => tokens.push(Token::Or),
91 "NOT" => tokens.push(Token::Not),
92 _ => tokens.push(Token::Word(word)),
93 }
94 }
95 }
96 }
97
98 Ok(tokens)
99}
100
101struct Parser {
104 tokens: Vec<Token>,
105 pos: usize,
106}
107
108impl Parser {
109 fn new(tokens: Vec<Token>) -> Self {
110 Self { tokens, pos: 0 }
111 }
112
113 fn peek(&self) -> Option<&Token> {
114 self.tokens.get(self.pos)
115 }
116
117 fn next(&mut self) -> Option<Token> {
118 let tok = self.tokens.get(self.pos).cloned();
119 if tok.is_some() {
120 self.pos += 1;
121 }
122 tok
123 }
124
125 fn at_end(&self) -> bool {
126 self.pos >= self.tokens.len()
127 }
128
129 fn parse_expression(&mut self) -> Result<QueryNode, ParseError> {
131 let mut left = self.parse_or()?;
132
133 while !self.at_end() {
134 if matches!(self.peek(), Some(Token::RParen)) {
136 break;
137 }
138 if matches!(self.peek(), Some(Token::Or)) {
140 break;
141 }
142 if matches!(self.peek(), Some(Token::And)) {
144 self.next();
145 }
146 if self.at_end() || matches!(self.peek(), Some(Token::RParen | Token::Or)) {
147 break;
148 }
149 let right = self.parse_or()?;
150 left = QueryNode::And(Box::new(left), Box::new(right));
151 }
152
153 Ok(left)
154 }
155
156 fn parse_or(&mut self) -> Result<QueryNode, ParseError> {
157 let mut left = self.parse_unary()?;
158
159 while matches!(self.peek(), Some(Token::Or)) {
160 self.next(); let right = self.parse_unary()?;
162 left = QueryNode::Or(Box::new(left), Box::new(right));
163 }
164
165 Ok(left)
166 }
167
168 fn parse_unary(&mut self) -> Result<QueryNode, ParseError> {
169 match self.peek() {
170 Some(Token::Minus) => {
171 self.next();
172 let node = self.parse_atom()?;
173 Ok(QueryNode::Not(Box::new(node)))
174 }
175 Some(Token::Not) => {
176 self.next();
177 let node = self.parse_atom()?;
178 Ok(QueryNode::Not(Box::new(node)))
179 }
180 _ => self.parse_atom(),
181 }
182 }
183
184 fn parse_atom(&mut self) -> Result<QueryNode, ParseError> {
185 match self.peek() {
186 Some(Token::LParen) => {
187 self.next(); let node = self.parse_expression()?;
189 match self.next() {
190 Some(Token::RParen) => Ok(node),
191 _ => Err(ParseError::UnmatchedParen),
192 }
193 }
194 Some(Token::Phrase(s)) => {
195 let s = s.clone();
196 self.next();
197 Ok(QueryNode::Phrase(s))
198 }
199 Some(Token::Word(_)) => {
200 if self.pos + 1 < self.tokens.len()
202 && matches!(self.tokens[self.pos + 1], Token::Colon)
203 {
204 return self.parse_field_value();
205 }
206 let word = match self.next() {
207 Some(Token::Word(w)) => w,
208 _ => unreachable!(),
209 };
210 Ok(QueryNode::Text(word))
211 }
212 Some(tok) => Err(ParseError::UnexpectedToken(format!("{:?}", tok))),
213 None => Err(ParseError::UnexpectedEnd),
214 }
215 }
216
217 fn parse_field_value(&mut self) -> Result<QueryNode, ParseError> {
218 let field_name = match self.next() {
219 Some(Token::Word(w)) => w,
220 _ => return Err(ParseError::UnexpectedEnd),
221 };
222
223 match self.next() {
225 Some(Token::Colon) => {}
226 _ => return Err(ParseError::ExpectedValue),
227 }
228
229 let value = match self.next() {
230 Some(Token::Word(w)) => w,
231 Some(Token::Phrase(p)) => p,
232 _ => return Err(ParseError::ExpectedValue),
233 };
234
235 match field_name.to_lowercase().as_str() {
236 "from" => Ok(QueryNode::Field {
237 field: QueryField::From,
238 value,
239 }),
240 "to" => Ok(QueryNode::Field {
241 field: QueryField::To,
242 value,
243 }),
244 "cc" => Ok(QueryNode::Field {
245 field: QueryField::Cc,
246 value,
247 }),
248 "bcc" => Ok(QueryNode::Field {
249 field: QueryField::Bcc,
250 value,
251 }),
252 "subject" => Ok(QueryNode::Field {
253 field: QueryField::Subject,
254 value,
255 }),
256 "body" => Ok(QueryNode::Field {
257 field: QueryField::Body,
258 value,
259 }),
260 "filename" => Ok(QueryNode::Field {
261 field: QueryField::Filename,
262 value,
263 }),
264 "label" => Ok(QueryNode::Label(value)),
265 "is" => match value.to_lowercase().as_str() {
266 "unread" => Ok(QueryNode::Filter(FilterKind::Unread)),
267 "read" => Ok(QueryNode::Filter(FilterKind::Read)),
268 "starred" => Ok(QueryNode::Filter(FilterKind::Starred)),
269 "draft" | "drafts" => Ok(QueryNode::Filter(FilterKind::Draft)),
270 "sent" => Ok(QueryNode::Filter(FilterKind::Sent)),
271 "trash" | "deleted" => Ok(QueryNode::Filter(FilterKind::Trash)),
272 "spam" | "junk" => Ok(QueryNode::Filter(FilterKind::Spam)),
273 "answered" | "replied" => Ok(QueryNode::Filter(FilterKind::Answered)),
274 "inbox" => Ok(QueryNode::Filter(FilterKind::Inbox)),
275 "archived" | "archive" => Ok(QueryNode::Filter(FilterKind::Archived)),
276 other => Err(ParseError::UnknownFilter(other.to_string())),
277 },
278 "has" => match value.to_lowercase().as_str() {
279 "attachment" | "attachments" => Ok(QueryNode::Filter(FilterKind::HasAttachment)),
280 other => Err(ParseError::UnknownFilter(other.to_string())),
281 },
282 "size" => {
283 let (op, bytes) = parse_size_value(&value)?;
284 Ok(QueryNode::Size { op, bytes })
285 }
286 "after" => {
287 let date = parse_date_value(&value)?;
288 Ok(QueryNode::DateRange {
289 bound: DateBound::After,
290 date,
291 })
292 }
293 "before" => {
294 let date = parse_date_value(&value)?;
295 Ok(QueryNode::DateRange {
296 bound: DateBound::Before,
297 date,
298 })
299 }
300 "date" => {
301 let date = parse_date_value(&value)?;
302 Ok(QueryNode::DateRange {
303 bound: DateBound::Exact,
304 date,
305 })
306 }
307 "older" => {
308 let date = parse_relative_duration_date(&value)?;
309 Ok(QueryNode::DateRange {
310 bound: DateBound::Before,
311 date: DateValue::Specific(date),
312 })
313 }
314 "newer" => {
315 let date = parse_relative_duration_date(&value)?;
316 Ok(QueryNode::DateRange {
317 bound: DateBound::After,
318 date: DateValue::Specific(date),
319 })
320 }
321 other => Err(ParseError::UnknownFilter(other.to_string())),
322 }
323 }
324}
325
326fn parse_date_value(s: &str) -> Result<DateValue, ParseError> {
327 match s.to_lowercase().as_str() {
328 "today" => Ok(DateValue::Today),
329 "yesterday" => Ok(DateValue::Yesterday),
330 "this-week" => Ok(DateValue::ThisWeek),
331 "this-month" => Ok(DateValue::ThisMonth),
332 _ => {
333 let date = NaiveDate::parse_from_str(s, "%Y-%m-%d")
334 .map_err(|_| ParseError::InvalidDate(s.to_string()))?;
335 Ok(DateValue::Specific(date))
336 }
337 }
338}
339
340fn parse_relative_duration_date(s: &str) -> Result<NaiveDate, ParseError> {
341 let input = s.trim().to_lowercase();
342 if input.len() < 2 {
343 return Err(ParseError::InvalidDate(s.to_string()));
344 }
345
346 let (amount, unit) = input.split_at(input.len() - 1);
347 let count = amount
348 .parse::<i64>()
349 .map_err(|_| ParseError::InvalidDate(s.to_string()))?;
350 let days = match unit {
351 "d" => count,
352 "w" => count * 7,
353 "m" => count * 30,
354 "y" => count * 365,
355 _ => return Err(ParseError::InvalidDate(s.to_string())),
356 };
357
358 Ok(Local::now().date_naive() - Duration::days(days))
359}
360
361fn parse_size_value(s: &str) -> Result<(SizeOp, u64), ParseError> {
362 let input = s.trim().to_lowercase();
363 if input.is_empty() {
364 return Err(ParseError::InvalidSize(s.to_string()));
365 }
366
367 let (op, rest) = if let Some(rest) = input.strip_prefix(">=") {
368 (SizeOp::GreaterThanOrEqual, rest)
369 } else if let Some(rest) = input.strip_prefix("<=") {
370 (SizeOp::LessThanOrEqual, rest)
371 } else if let Some(rest) = input.strip_prefix('>') {
372 (SizeOp::GreaterThan, rest)
373 } else if let Some(rest) = input.strip_prefix('<') {
374 (SizeOp::LessThan, rest)
375 } else if let Some(rest) = input.strip_prefix('=') {
376 (SizeOp::Equal, rest)
377 } else {
378 (SizeOp::Equal, input.as_str())
379 };
380
381 let number_end = rest
382 .find(|ch: char| !ch.is_ascii_digit() && ch != '.')
383 .unwrap_or(rest.len());
384 let (number_part, unit_part) = rest.split_at(number_end);
385 if number_part.is_empty() {
386 return Err(ParseError::InvalidSize(s.to_string()));
387 }
388
389 let value = number_part
390 .parse::<f64>()
391 .map_err(|_| ParseError::InvalidSize(s.to_string()))?;
392 let multiplier = match unit_part {
393 "" | "b" => 1_f64,
394 "k" | "kb" => 1024_f64,
395 "m" | "mb" => 1024_f64 * 1024_f64,
396 "g" | "gb" => 1024_f64 * 1024_f64 * 1024_f64,
397 other => return Err(ParseError::InvalidSize(other.to_string())),
398 };
399
400 Ok((op, (value * multiplier).round() as u64))
401}
402
403pub fn parse_query(input: &str) -> Result<QueryNode, ParseError> {
406 let input = input.trim();
407 if input.is_empty() {
408 return Err(ParseError::UnexpectedEnd);
409 }
410 let tokens = tokenize(input)?;
411 if tokens.is_empty() {
412 return Err(ParseError::UnexpectedEnd);
413 }
414 let mut parser = Parser::new(tokens);
415 let node = parser.parse_expression()?;
416 if !parser.at_end() && matches!(parser.peek(), Some(Token::RParen)) {
417 return Err(ParseError::UnmatchedParen);
418 }
419 Ok(node)
420}
421
422#[cfg(test)]
425mod tests {
426 use super::*;
427 use chrono::NaiveDate;
428
429 #[test]
430 fn parse_single_word() {
431 let result = parse_query("deployment").unwrap();
432 assert_eq!(result, QueryNode::Text("deployment".to_string()));
433 }
434
435 #[test]
436 fn parse_phrase() {
437 let result = parse_query("\"deployment plan\"").unwrap();
438 assert_eq!(result, QueryNode::Phrase("deployment plan".to_string()));
439 }
440
441 #[test]
442 fn parse_from_field() {
443 let result = parse_query("from:alice@example.com").unwrap();
444 assert_eq!(
445 result,
446 QueryNode::Field {
447 field: QueryField::From,
448 value: "alice@example.com".to_string(),
449 }
450 );
451 }
452
453 #[test]
454 fn parse_to_field() {
455 let result = parse_query("to:bob").unwrap();
456 assert_eq!(
457 result,
458 QueryNode::Field {
459 field: QueryField::To,
460 value: "bob".to_string(),
461 }
462 );
463 }
464
465 #[test]
466 fn parse_cc_bcc_and_body_fields() {
467 assert_eq!(
468 parse_query("cc:alice@example.com").unwrap(),
469 QueryNode::Field {
470 field: QueryField::Cc,
471 value: "alice@example.com".to_string(),
472 }
473 );
474 assert_eq!(
475 parse_query("bcc:hidden@example.com").unwrap(),
476 QueryNode::Field {
477 field: QueryField::Bcc,
478 value: "hidden@example.com".to_string(),
479 }
480 );
481 assert_eq!(
482 parse_query("body:\"deploy canary\"").unwrap(),
483 QueryNode::Field {
484 field: QueryField::Body,
485 value: "deploy canary".to_string(),
486 }
487 );
488 }
489
490 #[test]
491 fn parse_subject_field() {
492 let result = parse_query("subject:invoice").unwrap();
493 assert_eq!(
494 result,
495 QueryNode::Field {
496 field: QueryField::Subject,
497 value: "invoice".to_string(),
498 }
499 );
500 }
501
502 #[test]
503 fn parse_is_unread() {
504 let result = parse_query("is:unread").unwrap();
505 assert_eq!(result, QueryNode::Filter(FilterKind::Unread));
506 }
507
508 #[test]
509 fn parse_is_starred() {
510 let result = parse_query("is:starred").unwrap();
511 assert_eq!(result, QueryNode::Filter(FilterKind::Starred));
512 }
513
514 #[test]
515 fn parse_additional_is_filters() {
516 assert_eq!(
517 parse_query("is:sent").unwrap(),
518 QueryNode::Filter(FilterKind::Sent)
519 );
520 assert_eq!(
521 parse_query("is:draft").unwrap(),
522 QueryNode::Filter(FilterKind::Draft)
523 );
524 assert_eq!(
525 parse_query("is:trash").unwrap(),
526 QueryNode::Filter(FilterKind::Trash)
527 );
528 assert_eq!(
529 parse_query("is:spam").unwrap(),
530 QueryNode::Filter(FilterKind::Spam)
531 );
532 assert_eq!(
533 parse_query("is:answered").unwrap(),
534 QueryNode::Filter(FilterKind::Answered)
535 );
536 assert_eq!(
537 parse_query("is:inbox").unwrap(),
538 QueryNode::Filter(FilterKind::Inbox)
539 );
540 assert_eq!(
541 parse_query("is:archived").unwrap(),
542 QueryNode::Filter(FilterKind::Archived)
543 );
544 }
545
546 #[test]
547 fn parse_has_attachment() {
548 let result = parse_query("has:attachment").unwrap();
549 assert_eq!(result, QueryNode::Filter(FilterKind::HasAttachment));
550 }
551
552 #[test]
553 fn parse_label() {
554 let result = parse_query("label:work").unwrap();
555 assert_eq!(result, QueryNode::Label("work".to_string()));
556 }
557
558 #[test]
559 fn parse_date_after() {
560 let result = parse_query("after:2026-01-01").unwrap();
561 assert_eq!(
562 result,
563 QueryNode::DateRange {
564 bound: DateBound::After,
565 date: DateValue::Specific(NaiveDate::from_ymd_opt(2026, 1, 1).unwrap()),
566 }
567 );
568 }
569
570 #[test]
571 fn parse_date_before() {
572 let result = parse_query("before:2026-03-15").unwrap();
573 assert_eq!(
574 result,
575 QueryNode::DateRange {
576 bound: DateBound::Before,
577 date: DateValue::Specific(NaiveDate::from_ymd_opt(2026, 3, 15).unwrap()),
578 }
579 );
580 }
581
582 #[test]
583 fn parse_date_today() {
584 let result = parse_query("date:today").unwrap();
585 assert_eq!(
586 result,
587 QueryNode::DateRange {
588 bound: DateBound::Exact,
589 date: DateValue::Today,
590 }
591 );
592 }
593
594 #[test]
595 fn parse_older_relative_duration() {
596 let expected = Local::now().date_naive() - Duration::days(30);
597 let result = parse_query("older:30d").unwrap();
598 assert_eq!(
599 result,
600 QueryNode::DateRange {
601 bound: DateBound::Before,
602 date: DateValue::Specific(expected),
603 }
604 );
605 }
606
607 #[test]
608 fn parse_newer_relative_duration() {
609 let expected = Local::now().date_naive() - Duration::days(14);
610 let result = parse_query("newer:2w").unwrap();
611 assert_eq!(
612 result,
613 QueryNode::DateRange {
614 bound: DateBound::After,
615 date: DateValue::Specific(expected),
616 }
617 );
618 }
619
620 #[test]
621 fn reject_invalid_relative_duration_unit() {
622 let result = parse_query("older:30q");
623 assert_eq!(result, Err(ParseError::InvalidDate("30q".to_string())));
624 }
625
626 #[test]
627 fn parse_size_query() {
628 assert_eq!(
629 parse_query("size:>5mb").unwrap(),
630 QueryNode::Size {
631 op: SizeOp::GreaterThan,
632 bytes: 5 * 1024 * 1024,
633 }
634 );
635 assert_eq!(
636 parse_query("size:<=42kb").unwrap(),
637 QueryNode::Size {
638 op: SizeOp::LessThanOrEqual,
639 bytes: 42 * 1024,
640 }
641 );
642 }
643
644 #[test]
645 fn reject_invalid_size_unit() {
646 let result = parse_query("size:>5tb");
647 assert_eq!(result, Err(ParseError::InvalidSize("tb".to_string())));
648 }
649
650 #[test]
651 fn parse_implicit_and() {
652 let result = parse_query("invoice unread").unwrap();
653 assert_eq!(
654 result,
655 QueryNode::And(
656 Box::new(QueryNode::Text("invoice".to_string())),
657 Box::new(QueryNode::Text("unread".to_string())),
658 )
659 );
660 }
661
662 #[test]
663 fn parse_explicit_and() {
664 let result = parse_query("invoice AND unread").unwrap();
665 assert_eq!(
666 result,
667 QueryNode::And(
668 Box::new(QueryNode::Text("invoice".to_string())),
669 Box::new(QueryNode::Text("unread".to_string())),
670 )
671 );
672 }
673
674 #[test]
675 fn parse_or() {
676 let result = parse_query("invoice OR receipt").unwrap();
677 assert_eq!(
678 result,
679 QueryNode::Or(
680 Box::new(QueryNode::Text("invoice".to_string())),
681 Box::new(QueryNode::Text("receipt".to_string())),
682 )
683 );
684 }
685
686 #[test]
687 fn parse_not() {
688 let result = parse_query("-spam").unwrap();
689 assert_eq!(
690 result,
691 QueryNode::Not(Box::new(QueryNode::Text("spam".to_string())))
692 );
693
694 let result = parse_query("NOT spam").unwrap();
695 assert_eq!(
696 result,
697 QueryNode::Not(Box::new(QueryNode::Text("spam".to_string())))
698 );
699 }
700
701 #[test]
702 fn parse_parentheses() {
703 let result = parse_query("(from:alice OR from:bob) is:unread").unwrap();
704 assert_eq!(
705 result,
706 QueryNode::And(
707 Box::new(QueryNode::Or(
708 Box::new(QueryNode::Field {
709 field: QueryField::From,
710 value: "alice".to_string(),
711 }),
712 Box::new(QueryNode::Field {
713 field: QueryField::From,
714 value: "bob".to_string(),
715 }),
716 )),
717 Box::new(QueryNode::Filter(FilterKind::Unread)),
718 )
719 );
720 }
721
722 #[test]
723 fn parse_compound() {
724 let result = parse_query("from:alice subject:invoice is:unread after:2026-01-01").unwrap();
725 assert_eq!(
727 result,
728 QueryNode::And(
729 Box::new(QueryNode::And(
730 Box::new(QueryNode::And(
731 Box::new(QueryNode::Field {
732 field: QueryField::From,
733 value: "alice".to_string(),
734 }),
735 Box::new(QueryNode::Field {
736 field: QueryField::Subject,
737 value: "invoice".to_string(),
738 }),
739 )),
740 Box::new(QueryNode::Filter(FilterKind::Unread)),
741 )),
742 Box::new(QueryNode::DateRange {
743 bound: DateBound::After,
744 date: DateValue::Specific(NaiveDate::from_ymd_opt(2026, 1, 1).unwrap()),
745 }),
746 )
747 );
748 }
749}