1use self::{
8 grammar::{AddressPart, Capability},
9 lexer::tokenizer::TokenInfo,
10};
11use crate::{runtime::RuntimeError, Compiler, Envelope, FunctionMap};
12use ahash::AHashMap;
13use arc_swap::ArcSwap;
14use mail_parser::HeaderName;
15use std::{borrow::Cow, fmt::Display, sync::Arc};
16
17pub mod grammar;
18pub mod lexer;
19
20#[derive(Debug)]
21pub struct CompileError {
22 line_num: usize,
23 line_pos: usize,
24 error_type: ErrorType,
25}
26
27#[derive(Debug)]
28pub enum ErrorType {
29 InvalidCharacter(u8),
30 InvalidNumber(String),
31 InvalidMatchVariable(usize),
32 InvalidUnicodeSequence(u32),
33 InvalidNamespace(String),
34 InvalidRegex(String),
35 InvalidExpression(String),
36 InvalidUtf8String,
37 InvalidHeaderName,
38 InvalidArguments,
39 InvalidAddress,
40 InvalidURI,
41 InvalidEnvelope(String),
42 UnterminatedString,
43 UnterminatedComment,
44 UnterminatedMultiline,
45 UnterminatedBlock,
46 ScriptTooLong,
47 StringTooLong,
48 VariableTooLong,
49 VariableIsLocal(String),
50 HeaderTooLong,
51 ExpectedConstantString,
52 UnexpectedToken {
53 expected: Cow<'static, str>,
54 found: String,
55 },
56 UnexpectedEOF,
57 TooManyNestedBlocks,
58 TooManyNestedTests,
59 TooManyNestedForEveryParts,
60 TooManyIncludes,
61 LabelAlreadyDefined(String),
62 LabelUndefined(String),
63 BreakOutsideLoop,
64 ContinueOutsideLoop,
65 UnsupportedComparator(String),
66 DuplicatedParameter,
67 UndeclaredCapability(Capability),
68 MissingTag(Cow<'static, str>),
69}
70
71impl Default for Compiler {
72 fn default() -> Self {
73 Self::new()
74 }
75}
76
77#[derive(Debug, Clone, PartialEq, Eq)]
78#[cfg_attr(
79 any(test, feature = "serde"),
80 derive(serde::Serialize, serde::Deserialize)
81)]
82#[cfg_attr(
83 feature = "rkyv",
84 derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
85)]
86#[cfg_attr(
87 feature = "rkyv",
88 rkyv(serialize_bounds(
89 __S: rkyv::ser::Writer + rkyv::ser::Allocator,
90 __S::Error: rkyv::rancor::Source,
91 ))
92)]
93#[cfg_attr(
94 feature = "rkyv",
95 rkyv(deserialize_bounds(__D::Error: rkyv::rancor::Source))
96)]
97#[cfg_attr(
98 feature = "rkyv",
99 rkyv(bytecheck(
100 bounds(
101 __C: rkyv::validation::ArchiveContext,
102 )
103 ))
104)]
105pub(crate) enum Value {
106 Text(Arc<String>),
107 Number(Number),
108 Variable(VariableType),
109 Regex(Regex),
110 List(#[cfg_attr(feature = "rkyv", rkyv(omit_bounds))] Vec<Value>),
111}
112
113#[derive(Debug, Clone)]
114#[cfg_attr(
115 feature = "rkyv",
116 derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
117)]
118#[cfg_attr(
119 any(test, feature = "serde"),
120 derive(serde::Serialize, serde::Deserialize)
121)]
122pub struct Regex {
123 #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Skip))]
124 #[cfg_attr(any(test, feature = "serde"), serde(skip, default))]
125 pub regex: LazyRegex,
126 pub expr: String,
127}
128
129#[derive(Debug, Clone)]
130pub struct LazyRegex(pub Arc<ArcSwap<Option<fancy_regex::Regex>>>);
131
132#[derive(Debug, Clone, PartialEq, Eq)]
133#[cfg_attr(
134 any(test, feature = "serde"),
135 derive(serde::Serialize, serde::Deserialize)
136)]
137#[cfg_attr(
138 feature = "rkyv",
139 derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
140)]
141pub enum VariableType {
142 Local(usize),
143 Match(usize),
144 Global(String),
145 Environment(String),
146 Envelope(Envelope),
147 Header(HeaderVariable),
148 Part(MessagePart),
149}
150
151#[derive(Debug, Clone, PartialEq, Eq)]
152#[cfg_attr(
153 any(test, feature = "serde"),
154 derive(serde::Serialize, serde::Deserialize)
155)]
156#[cfg_attr(
157 feature = "rkyv",
158 derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
159)]
160pub struct Transform {
161 pub variable: Box<VariableType>,
162 pub functions: Vec<usize>,
163}
164
165#[derive(Debug, Clone, PartialEq, Eq)]
166#[cfg_attr(
167 any(test, feature = "serde"),
168 derive(serde::Serialize, serde::Deserialize)
169)]
170#[cfg_attr(
171 feature = "rkyv",
172 derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
173)]
174pub struct HeaderVariable {
175 pub name: Vec<HeaderName<'static>>,
176 pub part: HeaderPart,
177 pub index_hdr: i32,
178 pub index_part: i32,
179}
180
181#[derive(Debug, Clone, PartialEq, Eq)]
182#[cfg_attr(
183 any(test, feature = "serde"),
184 derive(serde::Serialize, serde::Deserialize)
185)]
186#[cfg_attr(
187 feature = "rkyv",
188 derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
189)]
190pub enum MessagePart {
191 TextBody(bool),
192 HtmlBody(bool),
193 Contents,
194 Raw,
195}
196
197#[derive(Debug, Clone, PartialEq, Eq)]
198#[cfg_attr(
199 any(test, feature = "serde"),
200 derive(serde::Serialize, serde::Deserialize)
201)]
202#[cfg_attr(
203 feature = "rkyv",
204 derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
205)]
206pub enum HeaderPart {
207 Text,
208 Date,
209 Id,
210 Address(AddressPart),
211 ContentType(ContentTypePart),
212 Received(ReceivedPart),
213 Raw,
214 RawName,
215 Exists,
216}
217
218#[derive(Debug, Clone, PartialEq, Eq)]
219#[cfg_attr(
220 any(test, feature = "serde"),
221 derive(serde::Serialize, serde::Deserialize)
222)]
223#[cfg_attr(
224 feature = "rkyv",
225 derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
226)]
227pub enum ContentTypePart {
228 Type,
229 Subtype,
230 Attribute(String),
231}
232
233#[derive(Debug, Clone, PartialEq, Eq)]
234#[cfg_attr(
235 any(test, feature = "serde"),
236 derive(serde::Serialize, serde::Deserialize)
237)]
238#[cfg_attr(
239 feature = "rkyv",
240 derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
241)]
242pub enum ReceivedPart {
243 From(ReceivedHostname),
244 FromIp,
245 FromIpRev,
246 By(ReceivedHostname),
247 For,
248 With,
249 TlsVersion,
250 TlsCipher,
251 Id,
252 Ident,
253 Via,
254 Date,
255 DateRaw,
256}
257
258#[derive(Debug, Clone, PartialEq, Eq)]
259#[cfg_attr(
260 any(test, feature = "serde"),
261 derive(serde::Serialize, serde::Deserialize)
262)]
263#[cfg_attr(
264 feature = "rkyv",
265 derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
266)]
267pub enum ReceivedHostname {
268 Name,
269 Ip,
270 Any,
271}
272
273#[derive(Debug, Clone, Copy)]
274#[cfg_attr(
275 any(test, feature = "serde"),
276 derive(serde::Serialize, serde::Deserialize)
277)]
278#[cfg_attr(
279 feature = "rkyv",
280 derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
281)]
282pub enum Number {
283 Integer(i64),
284 Float(f64),
285}
286
287impl Number {
288 #[cfg(test)]
289 pub fn to_float(&self) -> f64 {
290 match self {
291 Number::Integer(i) => *i as f64,
292 Number::Float(fl) => *fl,
293 }
294 }
295}
296
297impl From<Number> for usize {
298 fn from(value: Number) -> Self {
299 match value {
300 Number::Integer(i) => i as usize,
301 Number::Float(fl) => fl as usize,
302 }
303 }
304}
305
306impl Display for Number {
307 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
308 match self {
309 Number::Integer(i) => i.fmt(f),
310 Number::Float(fl) => fl.fmt(f),
311 }
312 }
313}
314
315impl Compiler {
316 pub const VERSION: u32 = 2;
317
318 pub fn new() -> Self {
319 Compiler {
320 max_script_size: 1024 * 1024,
321 max_string_size: 4096,
322 max_variable_name_size: 32,
323 max_nested_blocks: 15,
324 max_nested_tests: 15,
325 max_nested_foreverypart: 3,
326 max_match_variables: 30,
327 max_local_variables: 128,
328 max_header_size: 1024,
329 max_includes: 6,
330 functions: AHashMap::new(),
331 no_capability_check: false,
332 }
333 }
334
335 pub fn set_max_header_size(&mut self, size: usize) {
336 self.max_header_size = size;
337 }
338
339 pub fn with_max_header_size(mut self, size: usize) -> Self {
340 self.max_header_size = size;
341 self
342 }
343
344 pub fn set_max_includes(&mut self, size: usize) {
345 self.max_includes = size;
346 }
347
348 pub fn with_max_includes(mut self, size: usize) -> Self {
349 self.max_includes = size;
350 self
351 }
352
353 pub fn set_max_nested_blocks(&mut self, size: usize) {
354 self.max_nested_blocks = size;
355 }
356
357 pub fn with_max_nested_blocks(mut self, size: usize) -> Self {
358 self.max_nested_blocks = size;
359 self
360 }
361
362 pub fn set_max_nested_tests(&mut self, size: usize) {
363 self.max_nested_tests = size;
364 }
365
366 pub fn with_max_nested_tests(mut self, size: usize) -> Self {
367 self.max_nested_tests = size;
368 self
369 }
370
371 pub fn set_max_nested_foreverypart(&mut self, size: usize) {
372 self.max_nested_foreverypart = size;
373 }
374
375 pub fn with_max_nested_foreverypart(mut self, size: usize) -> Self {
376 self.max_nested_foreverypart = size;
377 self
378 }
379
380 pub fn set_max_script_size(&mut self, size: usize) {
381 self.max_script_size = size;
382 }
383
384 pub fn with_max_script_size(mut self, size: usize) -> Self {
385 self.max_script_size = size;
386 self
387 }
388
389 pub fn set_max_string_size(&mut self, size: usize) {
390 self.max_string_size = size;
391 }
392
393 pub fn with_max_string_size(mut self, size: usize) -> Self {
394 self.max_string_size = size;
395 self
396 }
397
398 pub fn set_max_variable_name_size(&mut self, size: usize) {
399 self.max_variable_name_size = size;
400 }
401
402 pub fn with_max_variable_name_size(mut self, size: usize) -> Self {
403 self.max_variable_name_size = size;
404 self
405 }
406
407 pub fn set_max_match_variables(&mut self, size: usize) {
408 self.max_match_variables = size;
409 }
410
411 pub fn with_max_match_variables(mut self, size: usize) -> Self {
412 self.max_match_variables = size;
413 self
414 }
415
416 pub fn set_max_local_variables(&mut self, size: usize) {
417 self.max_local_variables = size;
418 }
419
420 pub fn with_max_local_variables(mut self, size: usize) -> Self {
421 self.max_local_variables = size;
422 self
423 }
424
425 pub fn register_functions(mut self, fnc_map: &mut FunctionMap) -> Self {
426 self.functions = std::mem::take(&mut fnc_map.map);
427 self
428 }
429
430 pub fn with_no_capability_check(mut self, value: bool) -> Self {
431 self.no_capability_check = value;
432 self
433 }
434
435 pub fn set_no_capability_check(&mut self, value: bool) {
436 self.no_capability_check = value;
437 }
438}
439
440impl CompileError {
441 pub fn line_num(&self) -> usize {
442 self.line_num
443 }
444
445 pub fn line_pos(&self) -> usize {
446 self.line_pos
447 }
448
449 pub fn error_type(&self) -> &ErrorType {
450 &self.error_type
451 }
452}
453
454impl PartialEq for Regex {
455 fn eq(&self, other: &Self) -> bool {
456 self.expr == other.expr
457 }
458}
459
460impl Eq for Regex {}
461
462impl TokenInfo {
463 pub fn expected(self, expected: impl Into<Cow<'static, str>>) -> CompileError {
464 CompileError {
465 line_num: self.line_num,
466 line_pos: self.line_pos,
467 error_type: ErrorType::UnexpectedToken {
468 expected: expected.into(),
469 found: self.token.to_string(),
470 },
471 }
472 }
473
474 pub fn missing_tag(self, tag: impl Into<Cow<'static, str>>) -> CompileError {
475 CompileError {
476 line_num: self.line_num,
477 line_pos: self.line_pos,
478 error_type: ErrorType::MissingTag(tag.into()),
479 }
480 }
481
482 pub fn custom(self, error_type: ErrorType) -> CompileError {
483 CompileError {
484 line_num: self.line_num,
485 line_pos: self.line_pos,
486 error_type,
487 }
488 }
489}
490
491impl Default for LazyRegex {
492 fn default() -> Self {
493 Self(Arc::new(ArcSwap::new(Arc::new(None))))
494 }
495}
496
497impl Regex {
498 pub fn new(expr: String, regex: fancy_regex::Regex) -> Self {
499 Self {
500 expr,
501 regex: LazyRegex(Arc::new(ArcSwap::new(Arc::new(Some(regex))))),
502 }
503 }
504}
505
506impl Display for CompileError {
507 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
508 match &self.error_type() {
509 ErrorType::InvalidCharacter(value) => {
510 write!(f, "Invalid character {:?}", char::from(*value))
511 }
512 ErrorType::InvalidNumber(value) => write!(f, "Invalid number {value:?}"),
513 ErrorType::InvalidMatchVariable(value) => {
514 write!(f, "Match variable {value} out of range")
515 }
516 ErrorType::InvalidUnicodeSequence(value) => {
517 write!(f, "Invalid Unicode sequence {value:04x}")
518 }
519 ErrorType::InvalidNamespace(value) => write!(f, "Invalid namespace {value:?}"),
520 ErrorType::InvalidRegex(value) => write!(f, "Invalid regular expression {value:?}"),
521 ErrorType::InvalidExpression(value) => write!(f, "Invalid expression {value}"),
522 ErrorType::InvalidUtf8String => write!(f, "Invalid UTF-8 string"),
523 ErrorType::InvalidHeaderName => write!(f, "Invalid header name"),
524 ErrorType::InvalidArguments => write!(f, "Invalid Arguments"),
525 ErrorType::InvalidAddress => write!(f, "Invalid Address"),
526 ErrorType::InvalidURI => write!(f, "Invalid URI"),
527 ErrorType::InvalidEnvelope(value) => write!(f, "Invalid envelope {value:?}"),
528 ErrorType::UnterminatedString => write!(f, "Unterminated string"),
529 ErrorType::UnterminatedComment => write!(f, "Unterminated comment"),
530 ErrorType::UnterminatedMultiline => write!(f, "Unterminated multi-line string"),
531 ErrorType::UnterminatedBlock => write!(f, "Unterminated block"),
532 ErrorType::ScriptTooLong => write!(f, "Sieve script is too large"),
533 ErrorType::StringTooLong => write!(f, "String is too long"),
534 ErrorType::VariableTooLong => write!(f, "Variable name is too long"),
535 ErrorType::VariableIsLocal(value) => {
536 write!(f, "Variable {value:?} was already defined as local")
537 }
538 ErrorType::HeaderTooLong => write!(f, "Header value is too long"),
539 ErrorType::ExpectedConstantString => write!(f, "Expected a constant string"),
540 ErrorType::UnexpectedToken { expected, found } => {
541 write!(f, "Expected token {expected:?} but found {found:?}")
542 }
543 ErrorType::UnexpectedEOF => write!(f, "Unexpected end of file"),
544 ErrorType::TooManyNestedBlocks => write!(f, "Too many nested blocks"),
545 ErrorType::TooManyNestedTests => write!(f, "Too many nested tests"),
546 ErrorType::TooManyNestedForEveryParts => {
547 write!(f, "Too many nested foreverypart blocks")
548 }
549 ErrorType::TooManyIncludes => write!(f, "Too many includes"),
550 ErrorType::LabelAlreadyDefined(value) => write!(f, "Label {value:?} already defined"),
551 ErrorType::LabelUndefined(value) => write!(f, "Label {value:?} does not exist"),
552 ErrorType::BreakOutsideLoop => write!(f, "Break used outside of foreverypart loop"),
553 ErrorType::ContinueOutsideLoop => write!(f, "Continue used outside of while loop"),
554 ErrorType::UnsupportedComparator(value) => {
555 write!(f, "Comparator {value:?} is not supported")
556 }
557 ErrorType::DuplicatedParameter => write!(f, "Duplicated argument"),
558 ErrorType::UndeclaredCapability(value) => {
559 write!(f, "Undeclared capability '{value}'")
560 }
561 ErrorType::MissingTag(value) => write!(f, "Missing tag {value:?}"),
562 }?;
563
564 write!(
565 f,
566 " at line {}, column {}.",
567 self.line_num(),
568 self.line_pos()
569 )
570 }
571}
572
573impl Display for RuntimeError {
574 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
575 match self {
576 RuntimeError::TooManyIncludes => write!(f, ""),
577 RuntimeError::InvalidInstruction(value) => write!(
578 f,
579 "Script executed invalid instruction {:?} at line {}, column {}.",
580 value.name(),
581 value.line_pos(),
582 value.line_num()
583 ),
584 RuntimeError::ScriptErrorMessage(value) => {
585 write!(f, "Script reported error {value:?}.")
586 }
587 RuntimeError::CapabilityNotAllowed(value) => {
588 write!(f, "Capability '{value}' has been disabled.")
589 }
590 RuntimeError::CapabilityNotSupported(value) => {
591 write!(f, "Capability '{value}' not supported.")
592 }
593 RuntimeError::CPULimitReached => write!(
594 f,
595 "Script exceeded the maximum number of instructions allowed to execute."
596 ),
597 }
598 }
599}
600
601#[cfg(test)]
602mod tests {
603 use std::{fs, path::PathBuf};
604
605 use crate::Compiler;
606
607 #[test]
608 fn parse_rfc() {
609 let mut test_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
610 test_dir.push("tests");
611 test_dir.push("rfcs");
612 let mut tests_run = 0;
613
614 let compiler = Compiler::new().with_max_nested_foreverypart(10);
615
616 for file_name in fs::read_dir(&test_dir).unwrap() {
617 let mut file_name = file_name.unwrap().path();
618 if file_name.extension().is_some_and(|e| e == "sieve") {
619 println!("Parsing {}", file_name.display());
620
621 let script = fs::read(&file_name).unwrap();
633 file_name.set_extension("json");
634 let expected_result = fs::read(&file_name).unwrap();
635
636 tests_run += 1;
637
638 let sieve = compiler.compile(&script).unwrap();
639 let json_sieve = serde_json::to_string_pretty(
640 &sieve
641 .instructions
642 .into_iter()
643 .enumerate()
644 .collect::<Vec<_>>(),
645 )
646 .unwrap();
647
648 if json_sieve.as_bytes() != expected_result {
649 file_name.set_extension("failed");
650 fs::write(&file_name, json_sieve.as_bytes()).unwrap();
651 panic!("Test failed, parsed sieve saved to {}", file_name.display());
652 }
653 }
654 }
655
656 assert!(
657 tests_run > 0,
658 "Did not find any tests to run in folder {}.",
659 test_dir.display()
660 );
661 }
662}