sieve/compiler/
mod.rs

1/*
2 * Copyright (c) 2020-2023, Stalwart Labs Ltd.
3 *
4 * This file is part of the Stalwart Sieve Interpreter.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU Affero General Public License as
8 * published by the Free Software Foundation, either version 3 of
9 * the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Affero General Public License for more details.
15 * in the LICENSE file at the top-level directory of this distribution.
16 * You should have received a copy of the GNU Affero General Public License
17 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 *
19 * You can be released from the requirements of the AGPLv3 license by
20 * purchasing a commercial license. Please contact licensing@stalw.art
21 * for more details.
22*/
23
24use std::{borrow::Cow, fmt::Display, sync::Arc};
25
26use ahash::AHashMap;
27use mail_parser::HeaderName;
28use serde::{Deserialize, Deserializer, Serialize, Serializer};
29
30use crate::{runtime::RuntimeError, Compiler, Envelope, FunctionMap};
31
32use self::{
33    grammar::{AddressPart, Capability},
34    lexer::tokenizer::TokenInfo,
35};
36
37pub mod grammar;
38pub mod lexer;
39
40#[derive(Debug)]
41pub struct CompileError {
42    line_num: usize,
43    line_pos: usize,
44    error_type: ErrorType,
45}
46
47#[derive(Debug)]
48pub enum ErrorType {
49    InvalidCharacter(u8),
50    InvalidNumber(String),
51    InvalidMatchVariable(usize),
52    InvalidUnicodeSequence(u32),
53    InvalidNamespace(String),
54    InvalidRegex(String),
55    InvalidExpression(String),
56    InvalidUtf8String,
57    InvalidHeaderName,
58    InvalidArguments,
59    InvalidAddress,
60    InvalidURI,
61    InvalidEnvelope(String),
62    UnterminatedString,
63    UnterminatedComment,
64    UnterminatedMultiline,
65    UnterminatedBlock,
66    ScriptTooLong,
67    StringTooLong,
68    VariableTooLong,
69    VariableIsLocal(String),
70    HeaderTooLong,
71    ExpectedConstantString,
72    UnexpectedToken {
73        expected: Cow<'static, str>,
74        found: String,
75    },
76    UnexpectedEOF,
77    TooManyNestedBlocks,
78    TooManyNestedTests,
79    TooManyNestedForEveryParts,
80    TooManyIncludes,
81    LabelAlreadyDefined(String),
82    LabelUndefined(String),
83    BreakOutsideLoop,
84    ContinueOutsideLoop,
85    UnsupportedComparator(String),
86    DuplicatedParameter,
87    UndeclaredCapability(Capability),
88    MissingTag(Cow<'static, str>),
89}
90
91impl Default for Compiler {
92    fn default() -> Self {
93        Self::new()
94    }
95}
96
97#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
98pub enum Value {
99    Text(Arc<String>),
100    Number(Number),
101    Variable(VariableType),
102    Regex(Regex),
103    List(Vec<Value>),
104}
105
106#[derive(Debug, Clone)]
107pub struct Regex {
108    pub regex: fancy_regex::Regex,
109    pub expr: String,
110}
111
112#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
113pub enum VariableType {
114    Local(usize),
115    Match(usize),
116    Global(String),
117    Environment(String),
118    Envelope(Envelope),
119    Header(HeaderVariable),
120    Part(MessagePart),
121}
122
123#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
124pub struct Transform {
125    pub variable: Box<VariableType>,
126    pub functions: Vec<usize>,
127}
128
129#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
130pub struct HeaderVariable {
131    pub name: Vec<HeaderName<'static>>,
132    pub part: HeaderPart,
133    pub index_hdr: i32,
134    pub index_part: i32,
135}
136
137#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
138pub enum MessagePart {
139    TextBody(bool),
140    HtmlBody(bool),
141    Contents,
142    Raw,
143}
144
145#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
146pub enum HeaderPart {
147    Text,
148    Date,
149    Id,
150    Address(AddressPart),
151    ContentType(ContentTypePart),
152    Received(ReceivedPart),
153    Raw,
154    RawName,
155    Exists,
156}
157
158#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
159pub enum ContentTypePart {
160    Type,
161    Subtype,
162    Attribute(String),
163}
164
165#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
166pub enum ReceivedPart {
167    From(ReceivedHostname),
168    FromIp,
169    FromIpRev,
170    By(ReceivedHostname),
171    For,
172    With,
173    TlsVersion,
174    TlsCipher,
175    Id,
176    Ident,
177    Via,
178    Date,
179    DateRaw,
180}
181
182#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
183pub enum ReceivedHostname {
184    Name,
185    Ip,
186    Any,
187}
188
189#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
190pub enum Number {
191    Integer(i64),
192    Float(f64),
193}
194
195impl Number {
196    #[cfg(test)]
197    pub fn to_float(&self) -> f64 {
198        match self {
199            Number::Integer(i) => *i as f64,
200            Number::Float(fl) => *fl,
201        }
202    }
203}
204
205impl From<Number> for usize {
206    fn from(value: Number) -> Self {
207        match value {
208            Number::Integer(i) => i as usize,
209            Number::Float(fl) => fl as usize,
210        }
211    }
212}
213
214impl Display for Number {
215    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
216        match self {
217            Number::Integer(i) => i.fmt(f),
218            Number::Float(fl) => fl.fmt(f),
219        }
220    }
221}
222
223impl Compiler {
224    pub const VERSION: u32 = 2;
225
226    pub fn new() -> Self {
227        Compiler {
228            max_script_size: 1024 * 1024,
229            max_string_size: 4096,
230            max_variable_name_size: 32,
231            max_nested_blocks: 15,
232            max_nested_tests: 15,
233            max_nested_foreverypart: 3,
234            max_match_variables: 30,
235            max_local_variables: 128,
236            max_header_size: 1024,
237            max_includes: 6,
238            functions: AHashMap::new(),
239            no_capability_check: false,
240        }
241    }
242
243    pub fn set_max_header_size(&mut self, size: usize) {
244        self.max_header_size = size;
245    }
246
247    pub fn with_max_header_size(mut self, size: usize) -> Self {
248        self.max_header_size = size;
249        self
250    }
251
252    pub fn set_max_includes(&mut self, size: usize) {
253        self.max_includes = size;
254    }
255
256    pub fn with_max_includes(mut self, size: usize) -> Self {
257        self.max_includes = size;
258        self
259    }
260
261    pub fn set_max_nested_blocks(&mut self, size: usize) {
262        self.max_nested_blocks = size;
263    }
264
265    pub fn with_max_nested_blocks(mut self, size: usize) -> Self {
266        self.max_nested_blocks = size;
267        self
268    }
269
270    pub fn set_max_nested_tests(&mut self, size: usize) {
271        self.max_nested_tests = size;
272    }
273
274    pub fn with_max_nested_tests(mut self, size: usize) -> Self {
275        self.max_nested_tests = size;
276        self
277    }
278
279    pub fn set_max_nested_foreverypart(&mut self, size: usize) {
280        self.max_nested_foreverypart = size;
281    }
282
283    pub fn with_max_nested_foreverypart(mut self, size: usize) -> Self {
284        self.max_nested_foreverypart = size;
285        self
286    }
287
288    pub fn set_max_script_size(&mut self, size: usize) {
289        self.max_script_size = size;
290    }
291
292    pub fn with_max_script_size(mut self, size: usize) -> Self {
293        self.max_script_size = size;
294        self
295    }
296
297    pub fn set_max_string_size(&mut self, size: usize) {
298        self.max_string_size = size;
299    }
300
301    pub fn with_max_string_size(mut self, size: usize) -> Self {
302        self.max_string_size = size;
303        self
304    }
305
306    pub fn set_max_variable_name_size(&mut self, size: usize) {
307        self.max_variable_name_size = size;
308    }
309
310    pub fn with_max_variable_name_size(mut self, size: usize) -> Self {
311        self.max_variable_name_size = size;
312        self
313    }
314
315    pub fn set_max_match_variables(&mut self, size: usize) {
316        self.max_match_variables = size;
317    }
318
319    pub fn with_max_match_variables(mut self, size: usize) -> Self {
320        self.max_match_variables = size;
321        self
322    }
323
324    pub fn set_max_local_variables(&mut self, size: usize) {
325        self.max_local_variables = size;
326    }
327
328    pub fn with_max_local_variables(mut self, size: usize) -> Self {
329        self.max_local_variables = size;
330        self
331    }
332
333    pub fn register_functions(mut self, fnc_map: &mut FunctionMap) -> Self {
334        self.functions = std::mem::take(&mut fnc_map.map);
335        self
336    }
337
338    pub fn with_no_capability_check(mut self, value: bool) -> Self {
339        self.no_capability_check = value;
340        self
341    }
342
343    pub fn set_no_capability_check(&mut self, value: bool) {
344        self.no_capability_check = value;
345    }
346}
347
348impl CompileError {
349    pub fn line_num(&self) -> usize {
350        self.line_num
351    }
352
353    pub fn line_pos(&self) -> usize {
354        self.line_pos
355    }
356
357    pub fn error_type(&self) -> &ErrorType {
358        &self.error_type
359    }
360}
361
362impl PartialEq for Regex {
363    fn eq(&self, other: &Self) -> bool {
364        self.expr == other.expr
365    }
366}
367
368impl Eq for Regex {}
369
370impl Serialize for Regex {
371    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
372    where
373        S: Serializer,
374    {
375        self.expr.serialize(serializer)
376    }
377}
378
379impl<'de> Deserialize<'de> for Regex {
380    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
381    where
382        D: Deserializer<'de>,
383    {
384        <String>::deserialize(deserializer).and_then(|expr| {
385            fancy_regex::Regex::new(&expr)
386                .map(|regex| Regex { regex, expr })
387                .map_err(|err| serde::de::Error::custom(err.to_string()))
388        })
389    }
390}
391
392impl TokenInfo {
393    pub fn expected(self, expected: impl Into<Cow<'static, str>>) -> CompileError {
394        CompileError {
395            line_num: self.line_num,
396            line_pos: self.line_pos,
397            error_type: ErrorType::UnexpectedToken {
398                expected: expected.into(),
399                found: self.token.to_string(),
400            },
401        }
402    }
403
404    pub fn missing_tag(self, tag: impl Into<Cow<'static, str>>) -> CompileError {
405        CompileError {
406            line_num: self.line_num,
407            line_pos: self.line_pos,
408            error_type: ErrorType::MissingTag(tag.into()),
409        }
410    }
411
412    pub fn custom(self, error_type: ErrorType) -> CompileError {
413        CompileError {
414            line_num: self.line_num,
415            line_pos: self.line_pos,
416            error_type,
417        }
418    }
419}
420
421impl Display for CompileError {
422    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
423        match &self.error_type() {
424            ErrorType::InvalidCharacter(value) => {
425                write!(f, "Invalid character {:?}", char::from(*value))
426            }
427            ErrorType::InvalidNumber(value) => write!(f, "Invalid number {value:?}"),
428            ErrorType::InvalidMatchVariable(value) => {
429                write!(f, "Match variable {value} out of range")
430            }
431            ErrorType::InvalidUnicodeSequence(value) => {
432                write!(f, "Invalid Unicode sequence {value:04x}")
433            }
434            ErrorType::InvalidNamespace(value) => write!(f, "Invalid namespace {value:?}"),
435            ErrorType::InvalidRegex(value) => write!(f, "Invalid regular expression {value:?}"),
436            ErrorType::InvalidExpression(value) => write!(f, "Invalid expression {value}"),
437            ErrorType::InvalidUtf8String => write!(f, "Invalid UTF-8 string"),
438            ErrorType::InvalidHeaderName => write!(f, "Invalid header name"),
439            ErrorType::InvalidArguments => write!(f, "Invalid Arguments"),
440            ErrorType::InvalidAddress => write!(f, "Invalid Address"),
441            ErrorType::InvalidURI => write!(f, "Invalid URI"),
442            ErrorType::InvalidEnvelope(value) => write!(f, "Invalid envelope {value:?}"),
443            ErrorType::UnterminatedString => write!(f, "Unterminated string"),
444            ErrorType::UnterminatedComment => write!(f, "Unterminated comment"),
445            ErrorType::UnterminatedMultiline => write!(f, "Unterminated multi-line string"),
446            ErrorType::UnterminatedBlock => write!(f, "Unterminated block"),
447            ErrorType::ScriptTooLong => write!(f, "Sieve script is too large"),
448            ErrorType::StringTooLong => write!(f, "String is too long"),
449            ErrorType::VariableTooLong => write!(f, "Variable name is too long"),
450            ErrorType::VariableIsLocal(value) => {
451                write!(f, "Variable {value:?} was already defined as local")
452            }
453            ErrorType::HeaderTooLong => write!(f, "Header value is too long"),
454            ErrorType::ExpectedConstantString => write!(f, "Expected a constant string"),
455            ErrorType::UnexpectedToken { expected, found } => {
456                write!(f, "Expected token {expected:?} but found {found:?}")
457            }
458            ErrorType::UnexpectedEOF => write!(f, "Unexpected end of file"),
459            ErrorType::TooManyNestedBlocks => write!(f, "Too many nested blocks"),
460            ErrorType::TooManyNestedTests => write!(f, "Too many nested tests"),
461            ErrorType::TooManyNestedForEveryParts => {
462                write!(f, "Too many nested foreverypart blocks")
463            }
464            ErrorType::TooManyIncludes => write!(f, "Too many includes"),
465            ErrorType::LabelAlreadyDefined(value) => write!(f, "Label {value:?} already defined"),
466            ErrorType::LabelUndefined(value) => write!(f, "Label {value:?} does not exist"),
467            ErrorType::BreakOutsideLoop => write!(f, "Break used outside of foreverypart loop"),
468            ErrorType::ContinueOutsideLoop => write!(f, "Continue used outside of while loop"),
469            ErrorType::UnsupportedComparator(value) => {
470                write!(f, "Comparator {value:?} is not supported")
471            }
472            ErrorType::DuplicatedParameter => write!(f, "Duplicated argument"),
473            ErrorType::UndeclaredCapability(value) => {
474                write!(f, "Undeclared capability '{value}'")
475            }
476            ErrorType::MissingTag(value) => write!(f, "Missing tag {value:?}"),
477        }?;
478
479        write!(
480            f,
481            " at line {}, column {}.",
482            self.line_num(),
483            self.line_pos()
484        )
485    }
486}
487
488impl Display for RuntimeError {
489    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
490        match self {
491            RuntimeError::TooManyIncludes => write!(f, ""),
492            RuntimeError::InvalidInstruction(value) => write!(
493                f,
494                "Script executed invalid instruction {:?} at line {}, column {}.",
495                value.name(),
496                value.line_pos(),
497                value.line_num()
498            ),
499            RuntimeError::ScriptErrorMessage(value) => {
500                write!(f, "Script reported error {value:?}.")
501            }
502            RuntimeError::CapabilityNotAllowed(value) => {
503                write!(f, "Capability '{value}' has been disabled.")
504            }
505            RuntimeError::CapabilityNotSupported(value) => {
506                write!(f, "Capability '{value}' not supported.")
507            }
508            RuntimeError::CPULimitReached => write!(
509                f,
510                "Script exceeded the maximum number of instructions allowed to execute."
511            ),
512        }
513    }
514}
515
516#[cfg(test)]
517mod tests {
518    use std::{fs, path::PathBuf};
519
520    use crate::Compiler;
521
522    #[test]
523    fn parse_rfc() {
524        let mut test_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
525        test_dir.push("tests");
526        test_dir.push("rfcs");
527        let mut tests_run = 0;
528
529        let compiler = Compiler::new().with_max_nested_foreverypart(10);
530
531        for file_name in fs::read_dir(&test_dir).unwrap() {
532            let mut file_name = file_name.unwrap().path();
533            if file_name.extension().map_or(false, |e| e == "sieve") {
534                println!("Parsing {}", file_name.display());
535
536                /*if !file_name
537                    .file_name()
538                    .unwrap()
539                    .to_str()
540                    .unwrap()
541                    .contains("plugins")
542                {
543                    let test = "true";
544                    continue;
545                }*/
546
547                let script = fs::read(&file_name).unwrap();
548                file_name.set_extension("json");
549                let expected_result = fs::read(&file_name).unwrap();
550
551                tests_run += 1;
552
553                let sieve = compiler.compile(&script).unwrap();
554                let json_sieve = serde_json::to_string_pretty(
555                    &sieve
556                        .instructions
557                        .into_iter()
558                        .enumerate()
559                        .collect::<Vec<_>>(),
560                )
561                .unwrap();
562
563                if json_sieve.as_bytes() != expected_result {
564                    file_name.set_extension("failed");
565                    fs::write(&file_name, json_sieve.as_bytes()).unwrap();
566                    panic!("Test failed, parsed sieve saved to {}", file_name.display());
567                }
568            }
569        }
570
571        assert!(
572            tests_run > 0,
573            "Did not find any tests to run in folder {}.",
574            test_dir.display()
575        );
576    }
577}