sieve/compiler/
mod.rs

1/*
2 * SPDX-FileCopyrightText: 2020 Stalwart Labs Ltd <hello@stalw.art>
3 *
4 * SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
5 */
6
7use self::{
8    grammar::{AddressPart, Capability},
9    lexer::tokenizer::TokenInfo,
10};
11use crate::{runtime::RuntimeError, Compiler, Envelope, FunctionMap};
12use ahash::AHashMap;
13use arc_swap::ArcSwap;
14use mail_parser::HeaderName;
15use std::{borrow::Cow, fmt::Display, sync::Arc};
16
17pub mod grammar;
18pub mod lexer;
19
20#[derive(Debug)]
21pub struct CompileError {
22    line_num: usize,
23    line_pos: usize,
24    error_type: ErrorType,
25}
26
27#[derive(Debug)]
28pub enum ErrorType {
29    InvalidCharacter(u8),
30    InvalidNumber(String),
31    InvalidMatchVariable(usize),
32    InvalidUnicodeSequence(u32),
33    InvalidNamespace(String),
34    InvalidRegex(String),
35    InvalidExpression(String),
36    InvalidUtf8String,
37    InvalidHeaderName,
38    InvalidArguments,
39    InvalidAddress,
40    InvalidURI,
41    InvalidEnvelope(String),
42    UnterminatedString,
43    UnterminatedComment,
44    UnterminatedMultiline,
45    UnterminatedBlock,
46    ScriptTooLong,
47    StringTooLong,
48    VariableTooLong,
49    VariableIsLocal(String),
50    HeaderTooLong,
51    ExpectedConstantString,
52    UnexpectedToken {
53        expected: Cow<'static, str>,
54        found: String,
55    },
56    UnexpectedEOF,
57    TooManyNestedBlocks,
58    TooManyNestedTests,
59    TooManyNestedForEveryParts,
60    TooManyIncludes,
61    LabelAlreadyDefined(String),
62    LabelUndefined(String),
63    BreakOutsideLoop,
64    ContinueOutsideLoop,
65    UnsupportedComparator(String),
66    DuplicatedParameter,
67    UndeclaredCapability(Capability),
68    MissingTag(Cow<'static, str>),
69}
70
71impl Default for Compiler {
72    fn default() -> Self {
73        Self::new()
74    }
75}
76
77#[derive(Debug, Clone, PartialEq, Eq)]
78#[cfg_attr(
79    any(test, feature = "serde"),
80    derive(serde::Serialize, serde::Deserialize)
81)]
82#[cfg_attr(
83    feature = "rkyv",
84    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
85)]
86#[cfg_attr(
87    feature = "rkyv",
88    rkyv(serialize_bounds(
89        __S: rkyv::ser::Writer + rkyv::ser::Allocator,
90        __S::Error: rkyv::rancor::Source,
91    ))
92)]
93#[cfg_attr(
94    feature = "rkyv",
95    rkyv(deserialize_bounds(__D::Error: rkyv::rancor::Source))
96)]
97#[cfg_attr(
98    feature = "rkyv",
99    rkyv(bytecheck(
100        bounds(
101            __C: rkyv::validation::ArchiveContext,
102        )
103    ))
104)]
105pub(crate) enum Value {
106    Text(Arc<String>),
107    Number(Number),
108    Variable(VariableType),
109    Regex(Regex),
110    List(#[cfg_attr(feature = "rkyv", rkyv(omit_bounds))] Vec<Value>),
111}
112
113#[derive(Debug, Clone)]
114#[cfg_attr(
115    feature = "rkyv",
116    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
117)]
118#[cfg_attr(
119    any(test, feature = "serde"),
120    derive(serde::Serialize, serde::Deserialize)
121)]
122pub struct Regex {
123    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Skip))]
124    #[cfg_attr(any(test, feature = "serde"), serde(skip, default))]
125    pub regex: LazyRegex,
126    pub expr: String,
127}
128
129#[derive(Debug, Clone)]
130pub struct LazyRegex(pub Arc<ArcSwap<Option<fancy_regex::Regex>>>);
131
132#[derive(Debug, Clone, PartialEq, Eq)]
133#[cfg_attr(
134    any(test, feature = "serde"),
135    derive(serde::Serialize, serde::Deserialize)
136)]
137#[cfg_attr(
138    feature = "rkyv",
139    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
140)]
141pub enum VariableType {
142    Local(usize),
143    Match(usize),
144    Global(String),
145    Environment(String),
146    Envelope(Envelope),
147    Header(HeaderVariable),
148    Part(MessagePart),
149}
150
151#[derive(Debug, Clone, PartialEq, Eq)]
152#[cfg_attr(
153    any(test, feature = "serde"),
154    derive(serde::Serialize, serde::Deserialize)
155)]
156#[cfg_attr(
157    feature = "rkyv",
158    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
159)]
160pub struct Transform {
161    pub variable: Box<VariableType>,
162    pub functions: Vec<usize>,
163}
164
165#[derive(Debug, Clone, PartialEq, Eq)]
166#[cfg_attr(
167    any(test, feature = "serde"),
168    derive(serde::Serialize, serde::Deserialize)
169)]
170#[cfg_attr(
171    feature = "rkyv",
172    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
173)]
174pub struct HeaderVariable {
175    pub name: Vec<HeaderName<'static>>,
176    pub part: HeaderPart,
177    pub index_hdr: i32,
178    pub index_part: i32,
179}
180
181#[derive(Debug, Clone, PartialEq, Eq)]
182#[cfg_attr(
183    any(test, feature = "serde"),
184    derive(serde::Serialize, serde::Deserialize)
185)]
186#[cfg_attr(
187    feature = "rkyv",
188    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
189)]
190pub enum MessagePart {
191    TextBody(bool),
192    HtmlBody(bool),
193    Contents,
194    Raw,
195}
196
197#[derive(Debug, Clone, PartialEq, Eq)]
198#[cfg_attr(
199    any(test, feature = "serde"),
200    derive(serde::Serialize, serde::Deserialize)
201)]
202#[cfg_attr(
203    feature = "rkyv",
204    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
205)]
206pub enum HeaderPart {
207    Text,
208    Date,
209    Id,
210    Address(AddressPart),
211    ContentType(ContentTypePart),
212    Received(ReceivedPart),
213    Raw,
214    RawName,
215    Exists,
216}
217
218#[derive(Debug, Clone, PartialEq, Eq)]
219#[cfg_attr(
220    any(test, feature = "serde"),
221    derive(serde::Serialize, serde::Deserialize)
222)]
223#[cfg_attr(
224    feature = "rkyv",
225    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
226)]
227pub enum ContentTypePart {
228    Type,
229    Subtype,
230    Attribute(String),
231}
232
233#[derive(Debug, Clone, PartialEq, Eq)]
234#[cfg_attr(
235    any(test, feature = "serde"),
236    derive(serde::Serialize, serde::Deserialize)
237)]
238#[cfg_attr(
239    feature = "rkyv",
240    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
241)]
242pub enum ReceivedPart {
243    From(ReceivedHostname),
244    FromIp,
245    FromIpRev,
246    By(ReceivedHostname),
247    For,
248    With,
249    TlsVersion,
250    TlsCipher,
251    Id,
252    Ident,
253    Via,
254    Date,
255    DateRaw,
256}
257
258#[derive(Debug, Clone, PartialEq, Eq)]
259#[cfg_attr(
260    any(test, feature = "serde"),
261    derive(serde::Serialize, serde::Deserialize)
262)]
263#[cfg_attr(
264    feature = "rkyv",
265    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
266)]
267pub enum ReceivedHostname {
268    Name,
269    Ip,
270    Any,
271}
272
273#[derive(Debug, Clone, Copy)]
274#[cfg_attr(
275    any(test, feature = "serde"),
276    derive(serde::Serialize, serde::Deserialize)
277)]
278#[cfg_attr(
279    feature = "rkyv",
280    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
281)]
282pub enum Number {
283    Integer(i64),
284    Float(f64),
285}
286
287impl Number {
288    #[cfg(test)]
289    pub fn to_float(&self) -> f64 {
290        match self {
291            Number::Integer(i) => *i as f64,
292            Number::Float(fl) => *fl,
293        }
294    }
295}
296
297impl From<Number> for usize {
298    fn from(value: Number) -> Self {
299        match value {
300            Number::Integer(i) => i as usize,
301            Number::Float(fl) => fl as usize,
302        }
303    }
304}
305
306impl Display for Number {
307    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
308        match self {
309            Number::Integer(i) => i.fmt(f),
310            Number::Float(fl) => fl.fmt(f),
311        }
312    }
313}
314
315impl Compiler {
316    pub const VERSION: u32 = 2;
317
318    pub fn new() -> Self {
319        Compiler {
320            max_script_size: 1024 * 1024,
321            max_string_size: 4096,
322            max_variable_name_size: 32,
323            max_nested_blocks: 15,
324            max_nested_tests: 15,
325            max_nested_foreverypart: 3,
326            max_match_variables: 30,
327            max_local_variables: 128,
328            max_header_size: 1024,
329            max_includes: 6,
330            functions: AHashMap::new(),
331            no_capability_check: false,
332        }
333    }
334
335    pub fn set_max_header_size(&mut self, size: usize) {
336        self.max_header_size = size;
337    }
338
339    pub fn with_max_header_size(mut self, size: usize) -> Self {
340        self.max_header_size = size;
341        self
342    }
343
344    pub fn set_max_includes(&mut self, size: usize) {
345        self.max_includes = size;
346    }
347
348    pub fn with_max_includes(mut self, size: usize) -> Self {
349        self.max_includes = size;
350        self
351    }
352
353    pub fn set_max_nested_blocks(&mut self, size: usize) {
354        self.max_nested_blocks = size;
355    }
356
357    pub fn with_max_nested_blocks(mut self, size: usize) -> Self {
358        self.max_nested_blocks = size;
359        self
360    }
361
362    pub fn set_max_nested_tests(&mut self, size: usize) {
363        self.max_nested_tests = size;
364    }
365
366    pub fn with_max_nested_tests(mut self, size: usize) -> Self {
367        self.max_nested_tests = size;
368        self
369    }
370
371    pub fn set_max_nested_foreverypart(&mut self, size: usize) {
372        self.max_nested_foreverypart = size;
373    }
374
375    pub fn with_max_nested_foreverypart(mut self, size: usize) -> Self {
376        self.max_nested_foreverypart = size;
377        self
378    }
379
380    pub fn set_max_script_size(&mut self, size: usize) {
381        self.max_script_size = size;
382    }
383
384    pub fn with_max_script_size(mut self, size: usize) -> Self {
385        self.max_script_size = size;
386        self
387    }
388
389    pub fn set_max_string_size(&mut self, size: usize) {
390        self.max_string_size = size;
391    }
392
393    pub fn with_max_string_size(mut self, size: usize) -> Self {
394        self.max_string_size = size;
395        self
396    }
397
398    pub fn set_max_variable_name_size(&mut self, size: usize) {
399        self.max_variable_name_size = size;
400    }
401
402    pub fn with_max_variable_name_size(mut self, size: usize) -> Self {
403        self.max_variable_name_size = size;
404        self
405    }
406
407    pub fn set_max_match_variables(&mut self, size: usize) {
408        self.max_match_variables = size;
409    }
410
411    pub fn with_max_match_variables(mut self, size: usize) -> Self {
412        self.max_match_variables = size;
413        self
414    }
415
416    pub fn set_max_local_variables(&mut self, size: usize) {
417        self.max_local_variables = size;
418    }
419
420    pub fn with_max_local_variables(mut self, size: usize) -> Self {
421        self.max_local_variables = size;
422        self
423    }
424
425    pub fn register_functions(mut self, fnc_map: &mut FunctionMap) -> Self {
426        self.functions = std::mem::take(&mut fnc_map.map);
427        self
428    }
429
430    pub fn with_no_capability_check(mut self, value: bool) -> Self {
431        self.no_capability_check = value;
432        self
433    }
434
435    pub fn set_no_capability_check(&mut self, value: bool) {
436        self.no_capability_check = value;
437    }
438}
439
440impl CompileError {
441    pub fn line_num(&self) -> usize {
442        self.line_num
443    }
444
445    pub fn line_pos(&self) -> usize {
446        self.line_pos
447    }
448
449    pub fn error_type(&self) -> &ErrorType {
450        &self.error_type
451    }
452}
453
454impl PartialEq for Regex {
455    fn eq(&self, other: &Self) -> bool {
456        self.expr == other.expr
457    }
458}
459
460impl Eq for Regex {}
461
462impl TokenInfo {
463    pub fn expected(self, expected: impl Into<Cow<'static, str>>) -> CompileError {
464        CompileError {
465            line_num: self.line_num,
466            line_pos: self.line_pos,
467            error_type: ErrorType::UnexpectedToken {
468                expected: expected.into(),
469                found: self.token.to_string(),
470            },
471        }
472    }
473
474    pub fn missing_tag(self, tag: impl Into<Cow<'static, str>>) -> CompileError {
475        CompileError {
476            line_num: self.line_num,
477            line_pos: self.line_pos,
478            error_type: ErrorType::MissingTag(tag.into()),
479        }
480    }
481
482    pub fn custom(self, error_type: ErrorType) -> CompileError {
483        CompileError {
484            line_num: self.line_num,
485            line_pos: self.line_pos,
486            error_type,
487        }
488    }
489}
490
491impl Default for LazyRegex {
492    fn default() -> Self {
493        Self(Arc::new(ArcSwap::new(Arc::new(None))))
494    }
495}
496
497impl Regex {
498    pub fn new(expr: String, regex: fancy_regex::Regex) -> Self {
499        Self {
500            expr,
501            regex: LazyRegex(Arc::new(ArcSwap::new(Arc::new(Some(regex))))),
502        }
503    }
504}
505
506impl Display for CompileError {
507    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
508        match &self.error_type() {
509            ErrorType::InvalidCharacter(value) => {
510                write!(f, "Invalid character {:?}", char::from(*value))
511            }
512            ErrorType::InvalidNumber(value) => write!(f, "Invalid number {value:?}"),
513            ErrorType::InvalidMatchVariable(value) => {
514                write!(f, "Match variable {value} out of range")
515            }
516            ErrorType::InvalidUnicodeSequence(value) => {
517                write!(f, "Invalid Unicode sequence {value:04x}")
518            }
519            ErrorType::InvalidNamespace(value) => write!(f, "Invalid namespace {value:?}"),
520            ErrorType::InvalidRegex(value) => write!(f, "Invalid regular expression {value:?}"),
521            ErrorType::InvalidExpression(value) => write!(f, "Invalid expression {value}"),
522            ErrorType::InvalidUtf8String => write!(f, "Invalid UTF-8 string"),
523            ErrorType::InvalidHeaderName => write!(f, "Invalid header name"),
524            ErrorType::InvalidArguments => write!(f, "Invalid Arguments"),
525            ErrorType::InvalidAddress => write!(f, "Invalid Address"),
526            ErrorType::InvalidURI => write!(f, "Invalid URI"),
527            ErrorType::InvalidEnvelope(value) => write!(f, "Invalid envelope {value:?}"),
528            ErrorType::UnterminatedString => write!(f, "Unterminated string"),
529            ErrorType::UnterminatedComment => write!(f, "Unterminated comment"),
530            ErrorType::UnterminatedMultiline => write!(f, "Unterminated multi-line string"),
531            ErrorType::UnterminatedBlock => write!(f, "Unterminated block"),
532            ErrorType::ScriptTooLong => write!(f, "Sieve script is too large"),
533            ErrorType::StringTooLong => write!(f, "String is too long"),
534            ErrorType::VariableTooLong => write!(f, "Variable name is too long"),
535            ErrorType::VariableIsLocal(value) => {
536                write!(f, "Variable {value:?} was already defined as local")
537            }
538            ErrorType::HeaderTooLong => write!(f, "Header value is too long"),
539            ErrorType::ExpectedConstantString => write!(f, "Expected a constant string"),
540            ErrorType::UnexpectedToken { expected, found } => {
541                write!(f, "Expected token {expected:?} but found {found:?}")
542            }
543            ErrorType::UnexpectedEOF => write!(f, "Unexpected end of file"),
544            ErrorType::TooManyNestedBlocks => write!(f, "Too many nested blocks"),
545            ErrorType::TooManyNestedTests => write!(f, "Too many nested tests"),
546            ErrorType::TooManyNestedForEveryParts => {
547                write!(f, "Too many nested foreverypart blocks")
548            }
549            ErrorType::TooManyIncludes => write!(f, "Too many includes"),
550            ErrorType::LabelAlreadyDefined(value) => write!(f, "Label {value:?} already defined"),
551            ErrorType::LabelUndefined(value) => write!(f, "Label {value:?} does not exist"),
552            ErrorType::BreakOutsideLoop => write!(f, "Break used outside of foreverypart loop"),
553            ErrorType::ContinueOutsideLoop => write!(f, "Continue used outside of while loop"),
554            ErrorType::UnsupportedComparator(value) => {
555                write!(f, "Comparator {value:?} is not supported")
556            }
557            ErrorType::DuplicatedParameter => write!(f, "Duplicated argument"),
558            ErrorType::UndeclaredCapability(value) => {
559                write!(f, "Undeclared capability '{value}'")
560            }
561            ErrorType::MissingTag(value) => write!(f, "Missing tag {value:?}"),
562        }?;
563
564        write!(
565            f,
566            " at line {}, column {}.",
567            self.line_num(),
568            self.line_pos()
569        )
570    }
571}
572
573impl Display for RuntimeError {
574    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
575        match self {
576            RuntimeError::TooManyIncludes => write!(f, ""),
577            RuntimeError::InvalidInstruction(value) => write!(
578                f,
579                "Script executed invalid instruction {:?} at line {}, column {}.",
580                value.name(),
581                value.line_pos(),
582                value.line_num()
583            ),
584            RuntimeError::ScriptErrorMessage(value) => {
585                write!(f, "Script reported error {value:?}.")
586            }
587            RuntimeError::CapabilityNotAllowed(value) => {
588                write!(f, "Capability '{value}' has been disabled.")
589            }
590            RuntimeError::CapabilityNotSupported(value) => {
591                write!(f, "Capability '{value}' not supported.")
592            }
593            RuntimeError::CPULimitReached => write!(
594                f,
595                "Script exceeded the maximum number of instructions allowed to execute."
596            ),
597        }
598    }
599}
600
601#[cfg(test)]
602mod tests {
603    use std::{fs, path::PathBuf};
604
605    use crate::Compiler;
606
607    #[test]
608    fn parse_rfc() {
609        let mut test_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
610        test_dir.push("tests");
611        test_dir.push("rfcs");
612        let mut tests_run = 0;
613
614        let compiler = Compiler::new().with_max_nested_foreverypart(10);
615
616        for file_name in fs::read_dir(&test_dir).unwrap() {
617            let mut file_name = file_name.unwrap().path();
618            if file_name.extension().is_some_and(|e| e == "sieve") {
619                println!("Parsing {}", file_name.display());
620
621                /*if !file_name
622                    .file_name()
623                    .unwrap()
624                    .to_str()
625                    .unwrap()
626                    .contains("plugins")
627                {
628                    let test = "true";
629                    continue;
630                }*/
631
632                let script = fs::read(&file_name).unwrap();
633                file_name.set_extension("json");
634                let expected_result = fs::read(&file_name).unwrap();
635
636                tests_run += 1;
637
638                let sieve = compiler.compile(&script).unwrap();
639                let json_sieve = serde_json::to_string_pretty(
640                    &sieve
641                        .instructions
642                        .into_iter()
643                        .enumerate()
644                        .collect::<Vec<_>>(),
645                )
646                .unwrap();
647
648                if json_sieve.as_bytes() != expected_result {
649                    file_name.set_extension("failed");
650                    fs::write(&file_name, json_sieve.as_bytes()).unwrap();
651                    panic!("Test failed, parsed sieve saved to {}", file_name.display());
652                }
653            }
654        }
655
656        assert!(
657            tests_run > 0,
658            "Did not find any tests to run in folder {}.",
659            test_dir.display()
660        );
661    }
662}