sieve/compiler/grammar/
mod.rs

1/*
2 * SPDX-FileCopyrightText: 2020 Stalwart Labs Ltd <hello@stalw.art>
3 *
4 * SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
5 */
6
7use std::fmt::Display;
8
9use self::{expr::Expression, instruction::CompilerState};
10
11use super::{
12    lexer::{tokenizer::TokenInfo, word::Word, Token},
13    CompileError, ErrorType, Regex, Value,
14};
15
16pub mod actions;
17pub mod expr;
18pub mod instruction;
19pub mod test;
20pub mod tests;
21
22#[derive(Debug, Clone, PartialEq, Eq, Hash)]
23#[cfg_attr(
24    any(test, feature = "serde"),
25    derive(serde::Serialize, serde::Deserialize)
26)]
27#[cfg_attr(
28    feature = "rkyv",
29    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
30)]
31pub enum Capability {
32    Envelope,
33    EnvelopeDsn,
34    EnvelopeDeliverBy,
35    FileInto,
36    EncodedCharacter,
37    Comparator(Comparator),
38    Other(String),
39    Body,
40    Convert,
41    Copy,
42    Relational,
43    Date,
44    Index,
45    Duplicate,
46    Variables,
47    EditHeader,
48    ForEveryPart,
49    Mime,
50    Replace,
51    Enclose,
52    ExtractText,
53    Enotify,
54    RedirectDsn,
55    RedirectDeliverBy,
56    Environment,
57    Reject,
58    Ereject,
59    ExtLists,
60    SubAddress,
61    Vacation,
62    VacationSeconds,
63    Fcc,
64    Mailbox,
65    MailboxId,
66    MboxMetadata,
67    ServerMetadata,
68    SpecialUse,
69    Imap4Flags,
70    Ihave,
71    ImapSieve,
72    Include,
73    Regex,
74    SpamTest,
75    SpamTestPlus,
76    VirusTest,
77
78    // Extensions
79    Expressions,
80    While,
81}
82
83#[derive(Debug, Clone, Copy, PartialEq, Eq)]
84#[cfg_attr(
85    any(test, feature = "serde"),
86    derive(serde::Serialize, serde::Deserialize)
87)]
88#[cfg_attr(
89    feature = "rkyv",
90    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
91)]
92pub enum AddressPart {
93    LocalPart,
94    Domain,
95    All,
96    User,
97    Detail,
98    Name,
99}
100
101#[derive(Debug, Clone, Copy, PartialEq, Eq)]
102#[cfg_attr(
103    any(test, feature = "serde"),
104    derive(serde::Serialize, serde::Deserialize)
105)]
106#[cfg_attr(
107    feature = "rkyv",
108    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
109)]
110pub(crate) enum MatchType {
111    Is,
112    Contains,
113    Matches(u64),
114    Regex(u64),
115    Value(RelationalMatch),
116    Count(RelationalMatch),
117    List,
118}
119
120#[derive(Debug, Clone, Copy, PartialEq, Eq)]
121#[cfg_attr(
122    any(test, feature = "serde"),
123    derive(serde::Serialize, serde::Deserialize)
124)]
125#[cfg_attr(
126    feature = "rkyv",
127    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
128)]
129pub(crate) enum RelationalMatch {
130    Gt,
131    Ge,
132    Lt,
133    Le,
134    Eq,
135    Ne,
136}
137
138#[derive(Debug, Clone, PartialEq, Eq, Hash)]
139#[cfg_attr(
140    any(test, feature = "serde"),
141    derive(serde::Serialize, serde::Deserialize)
142)]
143#[cfg_attr(
144    feature = "rkyv",
145    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
146)]
147pub enum Comparator {
148    Elbonia,
149    Octet,
150    AsciiCaseMap,
151    AsciiNumeric,
152    Other(String),
153}
154
155#[derive(Debug, Clone, PartialEq, Eq)]
156#[cfg_attr(
157    any(test, feature = "serde"),
158    derive(serde::Serialize, serde::Deserialize)
159)]
160#[cfg_attr(
161    feature = "rkyv",
162    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
163)]
164pub struct Clear {
165    pub(crate) local_vars_idx: u32,
166    pub(crate) local_vars_num: u32,
167    pub(crate) match_vars: u64,
168}
169
170#[derive(Debug, Clone, PartialEq, Eq)]
171#[cfg_attr(
172    any(test, feature = "serde"),
173    derive(serde::Serialize, serde::Deserialize)
174)]
175#[cfg_attr(
176    feature = "rkyv",
177    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
178)]
179pub struct Invalid {
180    pub(crate) name: String,
181    pub(crate) line_num: usize,
182    pub(crate) line_pos: usize,
183}
184
185#[derive(Debug, Eq, PartialEq, Clone)]
186#[cfg_attr(
187    any(test, feature = "serde"),
188    derive(serde::Serialize, serde::Deserialize)
189)]
190#[cfg_attr(
191    feature = "rkyv",
192    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
193)]
194pub(crate) struct While {
195    pub expr: Vec<Expression>,
196    pub jz_pos: usize,
197}
198
199impl CompilerState<'_> {
200    #[inline(always)]
201    pub fn expect_instruction_end(&mut self) -> Result<(), CompileError> {
202        self.tokens.expect_token(Token::Semicolon)
203    }
204
205    pub fn ignore_instruction(&mut self) -> Result<(), CompileError> {
206        // Skip entire instruction
207        let mut curly_count = 0;
208        loop {
209            let token_info = self.tokens.unwrap_next()?;
210            match token_info.token {
211                Token::Semicolon if curly_count == 0 => {
212                    break;
213                }
214                Token::CurlyOpen => {
215                    curly_count += 1;
216                }
217                Token::CurlyClose => match curly_count {
218                    0 => {
219                        return Err(token_info.expected("instruction"));
220                    }
221                    1 => {
222                        break;
223                    }
224                    _ => curly_count -= 1,
225                },
226                _ => (),
227            }
228        }
229
230        Ok(())
231    }
232
233    pub fn ignore_test(&mut self) -> Result<(), CompileError> {
234        let mut d_count = 0;
235        while let Some(token_info) = self.tokens.peek() {
236            match token_info?.token {
237                Token::ParenthesisOpen => {
238                    d_count += 1;
239                }
240                Token::ParenthesisClose => {
241                    if d_count == 0 {
242                        break;
243                    } else {
244                        d_count -= 1;
245                    }
246                }
247                Token::Comma => {
248                    if d_count == 0 {
249                        break;
250                    }
251                }
252                Token::CurlyOpen => {
253                    break;
254                }
255                _ => (),
256            }
257            self.tokens.next();
258        }
259
260        Ok(())
261    }
262
263    pub fn parse_match_type(&mut self, word: Word) -> Result<MatchType, CompileError> {
264        match word {
265            Word::Is => Ok(MatchType::Is),
266            Word::Contains => Ok(MatchType::Contains),
267            Word::Matches => {
268                self.block.match_test_pos.push(self.instructions.len());
269                Ok(MatchType::Matches(0))
270            }
271            Word::Regex => {
272                self.block.match_test_pos.push(self.instructions.len());
273                Ok(MatchType::Regex(0))
274            }
275            Word::List => Ok(MatchType::List),
276            _ => {
277                let token_info = self.tokens.unwrap_next()?;
278                if let Token::StringConstant(text) = &token_info.token {
279                    if let Some(relational) = lookup_relational(text.to_string().as_ref()) {
280                        return Ok(if word == Word::Value {
281                            MatchType::Value(relational)
282                        } else {
283                            MatchType::Count(relational)
284                        });
285                    }
286                }
287                Err(token_info.expected("relational match"))
288            }
289        }
290    }
291
292    pub(crate) fn parse_comparator(&mut self) -> Result<Comparator, CompileError> {
293        let comparator = self.tokens.expect_static_string()?;
294        Ok(if let Some(comparator) = lookup_comparator(&comparator) {
295            comparator
296        } else {
297            Comparator::Other(comparator)
298        })
299    }
300
301    pub(crate) fn parse_static_strings(&mut self) -> Result<Vec<String>, CompileError> {
302        let token_info = self.tokens.unwrap_next()?;
303        match token_info.token {
304            Token::BracketOpen => {
305                let mut strings = Vec::new();
306                loop {
307                    let token_info = self.tokens.unwrap_next()?;
308                    match token_info.token {
309                        Token::StringConstant(string) => {
310                            strings.push(string.into_string());
311                        }
312                        Token::Comma => (),
313                        Token::BracketClose if !strings.is_empty() => break,
314                        _ => return Err(token_info.expected("constant string")),
315                    }
316                }
317                Ok(strings)
318            }
319            Token::StringConstant(string) => Ok(vec![string.into_string()]),
320            _ => Err(token_info.expected("'[' or constant string")),
321        }
322    }
323
324    pub fn parse_string(&mut self) -> Result<Value, CompileError> {
325        let next_token = self.tokens.unwrap_next()?;
326        match next_token.token {
327            Token::StringConstant(s) => Ok(Value::from(s)),
328            Token::StringVariable(s) => {
329                self.tokenize_string(&s, true)
330                    .map_err(|error_type| CompileError {
331                        line_num: next_token.line_num,
332                        line_pos: next_token.line_pos,
333                        error_type,
334                    })
335            }
336            Token::BracketOpen => {
337                let mut items = self.parse_string_list(false)?;
338                match items.pop() {
339                    Some(s) if items.is_empty() => Ok(s),
340                    _ => Err(next_token.expected("string")),
341                }
342            }
343            _ => Err(next_token.expected("string")),
344        }
345    }
346
347    pub(crate) fn parse_strings(&mut self, allow_empty: bool) -> Result<Vec<Value>, CompileError> {
348        let token_info = self.tokens.unwrap_next()?;
349        match token_info.token {
350            Token::BracketOpen => self.parse_string_list(allow_empty),
351            Token::StringConstant(s) => Ok(vec![Value::from(s)]),
352            Token::StringVariable(s) => {
353                self.tokenize_string(&s, true)
354                    .map(|s| vec![s])
355                    .map_err(|error_type| CompileError {
356                        line_num: token_info.line_num,
357                        line_pos: token_info.line_pos,
358                        error_type,
359                    })
360            }
361            _ => Err(token_info.expected("'[' or string")),
362        }
363    }
364
365    pub(crate) fn parse_string_token(
366        &mut self,
367        token_info: TokenInfo,
368    ) -> Result<Value, CompileError> {
369        match token_info.token {
370            Token::StringConstant(s) => Ok(Value::from(s)),
371            Token::StringVariable(s) => {
372                self.tokenize_string(&s, true)
373                    .map_err(|error_type| CompileError {
374                        line_num: token_info.line_num,
375                        line_pos: token_info.line_pos,
376                        error_type,
377                    })
378            }
379            _ => Err(token_info.expected("string")),
380        }
381    }
382
383    pub(crate) fn parse_strings_token(
384        &mut self,
385        token_info: TokenInfo,
386    ) -> Result<Vec<Value>, CompileError> {
387        match token_info.token {
388            Token::StringConstant(s) => Ok(vec![Value::from(s)]),
389            Token::StringVariable(s) => {
390                self.tokenize_string(&s, true)
391                    .map(|s| vec![s])
392                    .map_err(|error_type| CompileError {
393                        line_num: token_info.line_num,
394                        line_pos: token_info.line_pos,
395                        error_type,
396                    })
397            }
398            Token::BracketOpen => self.parse_string_list(false),
399            _ => Err(token_info.expected("string")),
400        }
401    }
402
403    pub(crate) fn parse_string_list(
404        &mut self,
405        allow_empty: bool,
406    ) -> Result<Vec<Value>, CompileError> {
407        let mut strings = Vec::new();
408        loop {
409            let token_info = self.tokens.unwrap_next()?;
410            match token_info.token {
411                Token::StringConstant(s) => {
412                    strings.push(Value::from(s));
413                }
414                Token::StringVariable(s) => {
415                    strings.push(self.tokenize_string(&s, true).map_err(|error_type| {
416                        CompileError {
417                            line_num: token_info.line_num,
418                            line_pos: token_info.line_pos,
419                            error_type,
420                        }
421                    })?);
422                }
423                Token::Comma => (),
424                Token::BracketClose if !strings.is_empty() || allow_empty => break,
425                _ => return Err(token_info.expected("string or string list")),
426            }
427        }
428        Ok(strings)
429    }
430
431    #[inline(always)]
432    pub(crate) fn has_capability(&self, capability: &Capability) -> bool {
433        [&self.block]
434            .into_iter()
435            .chain(self.block_stack.iter())
436            .any(|b| b.capabilities.contains(capability))
437            || (capability != &Capability::Ihave && self.compiler.no_capability_check)
438    }
439
440    #[inline(always)]
441    pub(crate) fn reset_param_check(&mut self) {
442        self.param_check.fill(false);
443    }
444
445    #[inline(always)]
446    pub(crate) fn validate_argument(
447        &mut self,
448        arg_num: usize,
449        capability: Option<Capability>,
450        line_num: usize,
451        line_pos: usize,
452    ) -> Result<(), CompileError> {
453        if arg_num > 0 {
454            if let Some(param) = self.param_check.get_mut(arg_num - 1) {
455                if !*param {
456                    *param = true;
457                } else {
458                    return Err(CompileError {
459                        line_num,
460                        line_pos,
461                        error_type: ErrorType::DuplicatedParameter,
462                    });
463                }
464            } else {
465                #[cfg(test)]
466                panic!("Argument out of range {arg_num}");
467            }
468        }
469        if let Some(capability) = capability {
470            if !self.has_capability(&capability) {
471                return Err(CompileError {
472                    line_num,
473                    line_pos,
474                    error_type: ErrorType::UndeclaredCapability(capability),
475                });
476            }
477        }
478
479        Ok(())
480    }
481
482    pub(crate) fn validate_match(
483        &mut self,
484        match_type: &MatchType,
485        key_list: &mut [Value],
486    ) -> Result<(), CompileError> {
487        if matches!(match_type, MatchType::Regex(_)) {
488            for key in key_list {
489                if let Value::Text(expr) = key {
490                    match fancy_regex::Regex::new(expr) {
491                        Ok(regex) => {
492                            *key = Value::Regex(Regex::new(expr.to_string(), regex));
493                        }
494                        Err(err) => {
495                            return Err(self
496                                .tokens
497                                .unwrap_next()?
498                                .custom(ErrorType::InvalidRegex(format!("{expr}: {err}"))));
499                        }
500                    }
501                }
502            }
503        }
504        Ok(())
505    }
506}
507
508impl Capability {
509    pub fn parse(capability: &str) -> Capability {
510        if let Some(capability) = lookup_capabilities(capability) {
511            capability
512        } else if let Some(comparator) = capability.strip_prefix("comparator-") {
513            Capability::Comparator(Comparator::Other(comparator.to_string()))
514        } else {
515            Capability::Other(capability.to_string())
516        }
517    }
518
519    pub fn all() -> &'static [Capability] {
520        &[
521            Capability::Envelope,
522            Capability::EnvelopeDsn,
523            Capability::EnvelopeDeliverBy,
524            Capability::FileInto,
525            Capability::EncodedCharacter,
526            Capability::Comparator(Comparator::Elbonia),
527            Capability::Comparator(Comparator::AsciiCaseMap),
528            Capability::Comparator(Comparator::AsciiNumeric),
529            Capability::Comparator(Comparator::Octet),
530            Capability::Body,
531            Capability::Convert,
532            Capability::Copy,
533            Capability::Relational,
534            Capability::Date,
535            Capability::Index,
536            Capability::Duplicate,
537            Capability::Variables,
538            Capability::EditHeader,
539            Capability::ForEveryPart,
540            Capability::Mime,
541            Capability::Replace,
542            Capability::Enclose,
543            Capability::ExtractText,
544            Capability::Enotify,
545            Capability::RedirectDsn,
546            Capability::RedirectDeliverBy,
547            Capability::Environment,
548            Capability::Reject,
549            Capability::Ereject,
550            Capability::ExtLists,
551            Capability::SubAddress,
552            Capability::Vacation,
553            Capability::VacationSeconds,
554            Capability::Fcc,
555            Capability::Mailbox,
556            Capability::MailboxId,
557            Capability::MboxMetadata,
558            Capability::ServerMetadata,
559            Capability::SpecialUse,
560            Capability::Imap4Flags,
561            Capability::Ihave,
562            Capability::ImapSieve,
563            Capability::Include,
564            Capability::Regex,
565            Capability::SpamTest,
566            Capability::SpamTestPlus,
567            Capability::VirusTest,
568        ]
569    }
570}
571
572fn lookup_relational(input: &str) -> Option<RelationalMatch> {
573    hashify::tiny_map!(
574        input.as_bytes(),
575        "gt" => RelationalMatch::Gt,
576        "ge" => RelationalMatch::Ge,
577        "lt" => RelationalMatch::Lt,
578        "le" => RelationalMatch::Le,
579        "eq" => RelationalMatch::Eq,
580        "ne" => RelationalMatch::Ne,
581    )
582}
583
584fn lookup_comparator(input: &str) -> Option<Comparator> {
585    hashify::tiny_map!(
586        input.as_bytes(),
587        "i;octet" => Comparator::Octet,
588        "i;ascii-casemap" => Comparator::AsciiCaseMap,
589        "i;ascii-numeric" => Comparator::AsciiNumeric,
590    )
591}
592
593impl Invalid {
594    pub fn name(&self) -> &str {
595        &self.name
596    }
597
598    pub fn line_num(&self) -> usize {
599        self.line_num
600    }
601
602    pub fn line_pos(&self) -> usize {
603        self.line_pos
604    }
605}
606
607impl From<&str> for Capability {
608    fn from(value: &str) -> Self {
609        Capability::parse(value)
610    }
611}
612
613impl From<String> for Capability {
614    fn from(value: String) -> Self {
615        Capability::parse(&value)
616    }
617}
618
619impl Display for Capability {
620    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
621        match self {
622            Capability::Envelope => f.write_str("envelope"),
623            Capability::EnvelopeDsn => f.write_str("envelope-dsn"),
624            Capability::EnvelopeDeliverBy => f.write_str("envelope-deliverby"),
625            Capability::FileInto => f.write_str("fileinto"),
626            Capability::EncodedCharacter => f.write_str("encoded-character"),
627            Capability::Comparator(Comparator::Elbonia) => f.write_str("comparator-elbonia"),
628            Capability::Comparator(Comparator::Octet) => f.write_str("comparator-i;octet"),
629            Capability::Comparator(Comparator::AsciiCaseMap) => {
630                f.write_str("comparator-i;ascii-casemap")
631            }
632            Capability::Comparator(Comparator::AsciiNumeric) => {
633                f.write_str("comparator-i;ascii-numeric")
634            }
635            Capability::Comparator(Comparator::Other(comparator)) => f.write_str(comparator),
636            Capability::Body => f.write_str("body"),
637            Capability::Convert => f.write_str("convert"),
638            Capability::Copy => f.write_str("copy"),
639            Capability::Relational => f.write_str("relational"),
640            Capability::Date => f.write_str("date"),
641            Capability::Index => f.write_str("index"),
642            Capability::Duplicate => f.write_str("duplicate"),
643            Capability::Variables => f.write_str("variables"),
644            Capability::EditHeader => f.write_str("editheader"),
645            Capability::ForEveryPart => f.write_str("foreverypart"),
646            Capability::Mime => f.write_str("mime"),
647            Capability::Replace => f.write_str("replace"),
648            Capability::Enclose => f.write_str("enclose"),
649            Capability::ExtractText => f.write_str("extracttext"),
650            Capability::Enotify => f.write_str("enotify"),
651            Capability::RedirectDsn => f.write_str("redirect-dsn"),
652            Capability::RedirectDeliverBy => f.write_str("redirect-deliverby"),
653            Capability::Environment => f.write_str("environment"),
654            Capability::Reject => f.write_str("reject"),
655            Capability::Ereject => f.write_str("ereject"),
656            Capability::ExtLists => f.write_str("extlists"),
657            Capability::SubAddress => f.write_str("subaddress"),
658            Capability::Vacation => f.write_str("vacation"),
659            Capability::VacationSeconds => f.write_str("vacation-seconds"),
660            Capability::Fcc => f.write_str("fcc"),
661            Capability::Mailbox => f.write_str("mailbox"),
662            Capability::MailboxId => f.write_str("mailboxid"),
663            Capability::MboxMetadata => f.write_str("mboxmetadata"),
664            Capability::ServerMetadata => f.write_str("servermetadata"),
665            Capability::SpecialUse => f.write_str("special-use"),
666            Capability::Imap4Flags => f.write_str("imap4flags"),
667            Capability::Ihave => f.write_str("ihave"),
668            Capability::ImapSieve => f.write_str("imapsieve"),
669            Capability::Include => f.write_str("include"),
670            Capability::Regex => f.write_str("regex"),
671            Capability::SpamTest => f.write_str("spamtest"),
672            Capability::SpamTestPlus => f.write_str("spamtestplus"),
673            Capability::VirusTest => f.write_str("virustest"),
674            Capability::While => f.write_str("vnd.stalwart.while"),
675            Capability::Expressions => f.write_str("vnd.stalwart.expressions"),
676            Capability::Other(capability) => f.write_str(capability),
677        }
678    }
679}
680
681fn lookup_capabilities(input: &str) -> Option<Capability> {
682    hashify::tiny_map!(
683        input.as_bytes(),
684        "envelope" => Capability::Envelope,
685        "envelope-dsn" => Capability::EnvelopeDsn,
686        "envelope-deliverby" => Capability::EnvelopeDeliverBy,
687        "fileinto" => Capability::FileInto,
688        "encoded-character" => Capability::EncodedCharacter,
689        "comparator-elbonia" => Capability::Comparator(Comparator::Elbonia),
690        "comparator-i;octet" => Capability::Comparator(Comparator::Octet),
691        "comparator-i;ascii-casemap" => Capability::Comparator(Comparator::AsciiCaseMap),
692        "comparator-i;ascii-numeric" => Capability::Comparator(Comparator::AsciiNumeric),
693        "body" => Capability::Body,
694        "convert" => Capability::Convert,
695        "copy" => Capability::Copy,
696        "relational" => Capability::Relational,
697        "date" => Capability::Date,
698        "index" => Capability::Index,
699        "duplicate" => Capability::Duplicate,
700        "variables" => Capability::Variables,
701        "editheader" => Capability::EditHeader,
702        "foreverypart" => Capability::ForEveryPart,
703        "mime" => Capability::Mime,
704        "replace" => Capability::Replace,
705        "enclose" => Capability::Enclose,
706        "extracttext" => Capability::ExtractText,
707        "enotify" => Capability::Enotify,
708        "redirect-dsn" => Capability::RedirectDsn,
709        "redirect-deliverby" => Capability::RedirectDeliverBy,
710        "environment" => Capability::Environment,
711        "reject" => Capability::Reject,
712        "ereject" => Capability::Ereject,
713        "extlists" => Capability::ExtLists,
714        "subaddress" => Capability::SubAddress,
715        "vacation" => Capability::Vacation,
716        "vacation-seconds" => Capability::VacationSeconds,
717        "fcc" => Capability::Fcc,
718        "mailbox" => Capability::Mailbox,
719        "mailboxid" => Capability::MailboxId,
720        "mboxmetadata" => Capability::MboxMetadata,
721        "servermetadata" => Capability::ServerMetadata,
722        "special-use" => Capability::SpecialUse,
723        "imap4flags" => Capability::Imap4Flags,
724        "ihave" => Capability::Ihave,
725        "imapsieve" => Capability::ImapSieve,
726        "include" => Capability::Include,
727        "regex" => Capability::Regex,
728        "spamtest" => Capability::SpamTest,
729        "spamtestplus" => Capability::SpamTestPlus,
730        "virustest" => Capability::VirusTest,
731
732        // Extensions
733        "vnd.stalwart.while" => Capability::While,
734        "vnd.stalwart.expressions" => Capability::Expressions,
735    )
736}