sieve/compiler/lexer/
string.rs

1/*
2 * Copyright (c) 2020-2023, Stalwart Labs Ltd.
3 *
4 * This file is part of the Stalwart Sieve Interpreter.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU Affero General Public License as
8 * published by the Free Software Foundation, either version 3 of
9 * the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Affero General Public License for more details.
15 * in the LICENSE file at the top-level directory of this distribution.
16 * You should have received a copy of the GNU Affero General Public License
17 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 *
19 * You can be released from the requirements of the AGPLv3 license by
20 * purchasing a commercial license. Please contact licensing@stalw.art
21 * for more details.
22*/
23
24use std::fmt::Display;
25
26use mail_parser::HeaderName;
27
28use crate::{
29    compiler::{
30        grammar::{
31            expr::{self},
32            instruction::CompilerState,
33            AddressPart,
34        },
35        ContentTypePart, ErrorType, HeaderPart, HeaderVariable, MessagePart, Number,
36        ReceivedHostname, ReceivedPart, Value, VariableType,
37    },
38    runtime::eval::IntoString,
39    Envelope, MAX_MATCH_VARIABLES,
40};
41
42enum State {
43    None,
44    Variable,
45    Encoded {
46        is_unicode: bool,
47        initial_buf_size: usize,
48    },
49}
50
51impl<'x> CompilerState<'x> {
52    pub(crate) fn tokenize_string(
53        &mut self,
54        bytes: &[u8],
55        parse_decoded: bool,
56    ) -> Result<Value, ErrorType> {
57        let mut state = State::None;
58        let mut items = Vec::with_capacity(3);
59        let mut last_ch = 0;
60
61        let mut var_start_pos = usize::MAX;
62        let mut var_is_number = true;
63        let mut var_has_namespace = false;
64
65        let mut text_has_digits = true;
66        let mut text_has_dots = false;
67
68        let mut hex_start = usize::MAX;
69        let mut decode_buf = Vec::with_capacity(bytes.len());
70
71        for (pos, &ch) in bytes.iter().enumerate() {
72            let mut is_var_error = false;
73
74            match state {
75                State::None => match ch {
76                    b'{' if last_ch == b'$' => {
77                        decode_buf.pop();
78                        var_start_pos = pos + 1;
79                        var_is_number = true;
80                        var_has_namespace = false;
81                        state = State::Variable;
82                    }
83                    b'.' => {
84                        if text_has_dots {
85                            text_has_digits = false;
86                        } else {
87                            text_has_dots = true;
88                        }
89                        decode_buf.push(ch);
90                    }
91                    b'0'..=b'9' => {
92                        decode_buf.push(ch);
93                    }
94                    _ => {
95                        text_has_digits = false;
96                        decode_buf.push(ch);
97                    }
98                },
99                State::Variable => match ch {
100                    b'a'..=b'z' | b'A'..=b'Z' | b'_' | b'[' | b']' | b'*' | b'-' => {
101                        var_is_number = false;
102                    }
103                    b'.' => {
104                        var_is_number = false;
105                        var_has_namespace = true;
106                    }
107                    b'0'..=b'9' => {}
108                    b'}' => {
109                        if pos > var_start_pos {
110                            // Add any text before the variable
111                            if !decode_buf.is_empty() {
112                                self.add_value(
113                                    &mut items,
114                                    &decode_buf,
115                                    parse_decoded,
116                                    text_has_digits,
117                                    text_has_dots,
118                                )?;
119                                decode_buf.clear();
120                                text_has_digits = true;
121                                text_has_dots = false;
122                            }
123
124                            // Parse variable type
125                            let var_name = std::str::from_utf8(&bytes[var_start_pos..pos]).unwrap();
126                            let var_type = if !var_is_number {
127                                self.parse_variable(var_name, var_has_namespace)
128                            } else {
129                                self.parse_match_variable(var_name)
130                            };
131
132                            match var_type {
133                                Ok(Some(var)) => items.push(Value::Variable(var)),
134                                Ok(None) => {}
135                                Err(
136                                    ErrorType::InvalidNamespace(_) | ErrorType::InvalidEnvelope(_),
137                                ) => {
138                                    is_var_error = true;
139                                }
140                                Err(e) => return Err(e),
141                            }
142
143                            state = State::None;
144                        } else {
145                            is_var_error = true;
146                        }
147                    }
148                    b':' => {
149                        if parse_decoded && !var_has_namespace {
150                            match bytes.get(var_start_pos..pos) {
151                                Some(enc) if enc.eq_ignore_ascii_case(b"hex") => {
152                                    state = State::Encoded {
153                                        is_unicode: false,
154                                        initial_buf_size: decode_buf.len(),
155                                    };
156                                }
157                                Some(enc) if enc.eq_ignore_ascii_case(b"unicode") => {
158                                    state = State::Encoded {
159                                        is_unicode: true,
160                                        initial_buf_size: decode_buf.len(),
161                                    };
162                                }
163                                _ => {
164                                    is_var_error = true;
165                                }
166                            }
167                        } else if var_has_namespace {
168                            var_is_number = false;
169                        } else {
170                            is_var_error = true;
171                        }
172                    }
173                    _ => {
174                        is_var_error = true;
175                    }
176                },
177
178                State::Encoded {
179                    is_unicode,
180                    initial_buf_size,
181                } => match ch {
182                    b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' => {
183                        if hex_start == usize::MAX {
184                            hex_start = pos;
185                        }
186                    }
187                    b' ' | b'\t' | b'\r' | b'\n' | b'}' => {
188                        if hex_start != usize::MAX {
189                            let code = std::str::from_utf8(&bytes[hex_start..pos]).unwrap();
190                            hex_start = usize::MAX;
191
192                            if !is_unicode {
193                                if let Ok(ch) = u8::from_str_radix(code, 16) {
194                                    decode_buf.push(ch);
195                                } else {
196                                    is_var_error = true;
197                                }
198                            } else if let Ok(ch) = u32::from_str_radix(code, 16) {
199                                let mut buf = [0; 4];
200                                decode_buf.extend_from_slice(
201                                    char::from_u32(ch)
202                                        .ok_or(ErrorType::InvalidUnicodeSequence(ch))?
203                                        .encode_utf8(&mut buf)
204                                        .as_bytes(),
205                                );
206                            } else {
207                                is_var_error = true;
208                            }
209                        }
210                        if ch == b'}' {
211                            if decode_buf.len() != initial_buf_size {
212                                state = State::None;
213                            } else {
214                                is_var_error = true;
215                            }
216                        }
217                    }
218                    _ => {
219                        is_var_error = true;
220                    }
221                },
222            }
223
224            if is_var_error {
225                if let State::Encoded {
226                    initial_buf_size, ..
227                } = state
228                {
229                    if initial_buf_size != decode_buf.len() {
230                        decode_buf.truncate(initial_buf_size);
231                    }
232                }
233                decode_buf.extend_from_slice(&bytes[var_start_pos - 2..pos + 1]);
234                hex_start = usize::MAX;
235                state = State::None;
236            }
237
238            last_ch = ch;
239        }
240
241        match state {
242            State::Variable => {
243                decode_buf.extend_from_slice(&bytes[var_start_pos - 2..bytes.len()]);
244            }
245            State::Encoded {
246                initial_buf_size, ..
247            } => {
248                if initial_buf_size != decode_buf.len() {
249                    decode_buf.truncate(initial_buf_size);
250                }
251                decode_buf.extend_from_slice(&bytes[var_start_pos - 2..bytes.len()]);
252            }
253            State::None => (),
254        }
255
256        if !decode_buf.is_empty() {
257            self.add_value(
258                &mut items,
259                &decode_buf,
260                parse_decoded,
261                text_has_digits,
262                text_has_dots,
263            )?;
264        }
265
266        Ok(match items.len() {
267            1 => items.pop().unwrap(),
268            0 => Value::Text(String::new().into()),
269            _ => Value::List(items),
270        })
271    }
272
273    fn parse_match_variable(&mut self, var_name: &str) -> Result<Option<VariableType>, ErrorType> {
274        let num = var_name
275            .parse()
276            .map_err(|_| ErrorType::InvalidNumber(var_name.to_string()))?;
277        if num < MAX_MATCH_VARIABLES {
278            if self.register_match_var(num) {
279                let total_vars = num + 1;
280                if total_vars > self.vars_match_max {
281                    self.vars_match_max = total_vars;
282                }
283                Ok(Some(VariableType::Match(num)))
284            } else {
285                Ok(None)
286            }
287        } else {
288            Err(ErrorType::InvalidMatchVariable(num))
289        }
290    }
291
292    pub fn parse_variable(
293        &self,
294        var_name: &str,
295        maybe_namespace: bool,
296    ) -> Result<Option<VariableType>, ErrorType> {
297        if !maybe_namespace {
298            if self.is_var_global(var_name) {
299                Ok(Some(VariableType::Global(var_name.to_string())))
300            } else if let Some(var_id) = self.get_local_var(var_name) {
301                Ok(Some(VariableType::Local(var_id)))
302            } else {
303                Ok(None)
304            }
305        } else {
306            let var = match var_name.to_lowercase().split_once('.') {
307                Some(("global" | "t", var_name)) if !var_name.is_empty() => {
308                    VariableType::Global(var_name.to_string())
309                }
310                Some(("env", var_name)) if !var_name.is_empty() => {
311                    VariableType::Environment(var_name.to_string())
312                }
313                Some(("envelope", var_name)) if !var_name.is_empty() => {
314                    let envelope = match var_name {
315                        "from" => Envelope::From,
316                        "to" => Envelope::To,
317                        "by_time_absolute" => Envelope::ByTimeAbsolute,
318                        "by_time_relative" => Envelope::ByTimeRelative,
319                        "by_mode" => Envelope::ByMode,
320                        "by_trace" => Envelope::ByTrace,
321                        "notify" => Envelope::Notify,
322                        "orcpt" => Envelope::Orcpt,
323                        "ret" => Envelope::Ret,
324                        "envid" => Envelope::Envid,
325                        _ => {
326                            return Err(ErrorType::InvalidEnvelope(var_name.to_string()));
327                        }
328                    };
329                    VariableType::Envelope(envelope)
330                }
331                Some(("header", var_name)) if !var_name.is_empty() => {
332                    self.parse_header_variable(var_name)?
333                }
334                Some(("body", var_name)) if !var_name.is_empty() => match var_name {
335                    "text" => VariableType::Part(MessagePart::TextBody(false)),
336                    "html" => VariableType::Part(MessagePart::HtmlBody(false)),
337                    "to_text" => VariableType::Part(MessagePart::TextBody(true)),
338                    "to_html" => VariableType::Part(MessagePart::HtmlBody(true)),
339                    _ => return Err(ErrorType::InvalidNamespace(var_name.to_string())),
340                },
341                Some(("part", var_name)) if !var_name.is_empty() => match var_name {
342                    "text" => VariableType::Part(MessagePart::Contents),
343                    "raw" => VariableType::Part(MessagePart::Raw),
344                    _ => return Err(ErrorType::InvalidNamespace(var_name.to_string())),
345                },
346                None => {
347                    if self.is_var_global(var_name) {
348                        VariableType::Global(var_name.to_string())
349                    } else if let Some(var_id) = self.get_local_var(var_name) {
350                        VariableType::Local(var_id)
351                    } else {
352                        return Ok(None);
353                    }
354                }
355                _ => return Err(ErrorType::InvalidNamespace(var_name.to_string())),
356            };
357
358            Ok(Some(var))
359        }
360    }
361
362    fn parse_header_variable(&self, var_name: &str) -> Result<VariableType, ErrorType> {
363        #[derive(Debug)]
364        enum State {
365            Name,
366            Index,
367            Part,
368            PartIndex,
369        }
370        let mut name = vec![];
371        let mut has_name = false;
372        let mut has_wildcard = false;
373        let mut hdr_name = String::new();
374        let mut hdr_index = String::new();
375        let mut part = String::new();
376        let mut part_index = String::new();
377        let mut state = State::Name;
378
379        for ch in var_name.chars() {
380            match state {
381                State::Name => match ch {
382                    '[' => {
383                        state = if hdr_index.is_empty() {
384                            State::Index
385                        } else if part.is_empty() {
386                            State::PartIndex
387                        } else {
388                            return Err(ErrorType::InvalidExpression(var_name.to_string()));
389                        };
390                        has_name = true;
391                    }
392                    '.' => {
393                        state = State::Part;
394                        has_name = true;
395                    }
396                    ' ' | '\t' | '\r' | '\n' => {}
397                    '*' if !has_wildcard && hdr_name.is_empty() && name.is_empty() => {
398                        has_wildcard = true;
399                    }
400                    ':' if !hdr_name.is_empty() && !has_wildcard => {
401                        name.push(
402                            HeaderName::parse(std::mem::take(&mut hdr_name)).ok_or_else(|| {
403                                ErrorType::InvalidExpression(var_name.to_string())
404                            })?,
405                        );
406                    }
407                    _ if !has_name && !has_wildcard => {
408                        hdr_name.push(ch);
409                    }
410                    _ => {
411                        return Err(ErrorType::InvalidExpression(var_name.to_string()));
412                    }
413                },
414                State::Index => match ch {
415                    ']' => {
416                        state = State::Name;
417                    }
418                    ' ' | '\t' | '\r' | '\n' => {}
419                    _ => {
420                        hdr_index.push(ch);
421                    }
422                },
423                State::Part => match ch {
424                    '[' => {
425                        state = State::PartIndex;
426                    }
427                    ' ' | '\t' | '\r' | '\n' => {}
428                    _ => {
429                        part.push(ch);
430                    }
431                },
432                State::PartIndex => match ch {
433                    ']' => {
434                        state = State::Name;
435                    }
436                    ' ' | '\t' | '\r' | '\n' => {}
437                    _ => {
438                        part_index.push(ch);
439                    }
440                },
441            }
442        }
443
444        if !hdr_name.is_empty() {
445            name.push(
446                HeaderName::parse(hdr_name)
447                    .ok_or_else(|| ErrorType::InvalidExpression(var_name.to_string()))?,
448            );
449        }
450
451        if !name.is_empty() || has_wildcard {
452            Ok(VariableType::Header(HeaderVariable {
453                name,
454                part: HeaderPart::try_from(part.as_str())
455                    .map_err(|_| ErrorType::InvalidExpression(var_name.to_string()))?,
456                index_hdr: match hdr_index.as_str() {
457                    "" => {
458                        if !has_wildcard {
459                            -1
460                        } else {
461                            0
462                        }
463                    }
464                    "*" => 0,
465                    _ => hdr_index
466                        .parse()
467                        .map(|v| if v == 0 { 1 } else { v })
468                        .map_err(|_| ErrorType::InvalidExpression(var_name.to_string()))?,
469                },
470                index_part: match part_index.as_str() {
471                    "" => {
472                        if !has_wildcard {
473                            -1
474                        } else {
475                            0
476                        }
477                    }
478                    "*" => 0,
479                    _ => part_index
480                        .parse()
481                        .map(|v| if v == 0 { 1 } else { v })
482                        .map_err(|_| ErrorType::InvalidExpression(var_name.to_string()))?,
483                },
484            }))
485        } else {
486            Err(ErrorType::InvalidExpression(var_name.to_string()))
487        }
488    }
489
490    pub fn parse_expr_fnc_or_var(
491        &self,
492        var_name: &str,
493        maybe_namespace: bool,
494    ) -> Result<expr::Token, String> {
495        match self.parse_variable(var_name, maybe_namespace) {
496            Ok(Some(var)) => Ok(expr::Token::Variable(var)),
497            _ => {
498                if let Some((id, num_args)) = self.compiler.functions.get(var_name) {
499                    Ok(expr::Token::Function {
500                        name: var_name.to_string(),
501                        id: *id,
502                        num_args: *num_args,
503                    })
504                } else {
505                    Err(format!("Invalid variable or function name {var_name:?}"))
506                }
507            }
508        }
509    }
510
511    #[inline(always)]
512    fn add_value(
513        &mut self,
514        items: &mut Vec<Value>,
515        buf: &[u8],
516        parse_decoded: bool,
517        has_digits: bool,
518        has_dots: bool,
519    ) -> Result<(), ErrorType> {
520        if !parse_decoded {
521            items.push(if has_digits {
522                if has_dots {
523                    match std::str::from_utf8(buf)
524                        .ok()
525                        .and_then(|v| (v, v.parse::<f64>().ok()?).into())
526                    {
527                        Some((v, n)) if n.to_string() == v => Value::Number(Number::Float(n)),
528                        _ => Value::Text(buf.to_vec().into_string().into()),
529                    }
530                } else {
531                    match std::str::from_utf8(buf)
532                        .ok()
533                        .and_then(|v| (v, v.parse::<i64>().ok()?).into())
534                    {
535                        Some((v, n)) if n.to_string() == v => Value::Number(Number::Integer(n)),
536                        _ => Value::Text(buf.to_vec().into_string().into()),
537                    }
538                }
539            } else {
540                Value::Text(buf.to_vec().into_string().into())
541            });
542        } else {
543            match self.tokenize_string(buf, false)? {
544                Value::List(new_items) => items.extend(new_items),
545                item => items.push(item),
546            }
547        }
548
549        Ok(())
550    }
551}
552
553impl TryFrom<&str> for HeaderPart {
554    type Error = ();
555
556    fn try_from(value: &str) -> Result<Self, Self::Error> {
557        let (value, subvalue) = value.split_once('.').unwrap_or((value, ""));
558        Ok(match value {
559            "" | "text" => HeaderPart::Text,
560            // Addresses
561            "name" => HeaderPart::Address(AddressPart::Name),
562            "addr" => {
563                if !subvalue.is_empty() {
564                    HeaderPart::Address(AddressPart::try_from(subvalue)?)
565                } else {
566                    HeaderPart::Address(AddressPart::All)
567                }
568            }
569
570            // Content-type
571            "type" => HeaderPart::ContentType(ContentTypePart::Type),
572            "subtype" => HeaderPart::ContentType(ContentTypePart::Subtype),
573            "attr" if !subvalue.is_empty() => {
574                HeaderPart::ContentType(ContentTypePart::Attribute(subvalue.to_string()))
575            }
576
577            // Received
578            "rcvd" => {
579                if !subvalue.is_empty() {
580                    HeaderPart::Received(ReceivedPart::try_from(subvalue)?)
581                } else {
582                    HeaderPart::Text
583                }
584            }
585
586            // Id
587            "id" => HeaderPart::Id,
588
589            // Raw
590            "raw" => HeaderPart::Raw,
591            "raw_name" => HeaderPart::RawName,
592
593            // Date
594            "date" => HeaderPart::Date,
595
596            // Exists
597            "exists" => HeaderPart::Exists,
598
599            _ => {
600                return Err(());
601            }
602        })
603    }
604}
605
606impl TryFrom<&str> for ReceivedPart {
607    type Error = ();
608
609    fn try_from(value: &str) -> Result<Self, Self::Error> {
610        Ok(match value {
611            // Received
612            "from" => ReceivedPart::From(ReceivedHostname::Any),
613            "from.name" => ReceivedPart::From(ReceivedHostname::Name),
614            "from.ip" => ReceivedPart::From(ReceivedHostname::Ip),
615            "ip" => ReceivedPart::FromIp,
616            "iprev" => ReceivedPart::FromIpRev,
617            "by" => ReceivedPart::By(ReceivedHostname::Any),
618            "by.name" => ReceivedPart::By(ReceivedHostname::Name),
619            "by.ip" => ReceivedPart::By(ReceivedHostname::Ip),
620            "for" => ReceivedPart::For,
621            "with" => ReceivedPart::With,
622            "tls" => ReceivedPart::TlsVersion,
623            "cipher" => ReceivedPart::TlsCipher,
624            "id" => ReceivedPart::Id,
625            "ident" => ReceivedPart::Ident,
626            "date" => ReceivedPart::Date,
627            "date.raw" => ReceivedPart::DateRaw,
628            _ => return Err(()),
629        })
630    }
631}
632
633impl TryFrom<&str> for AddressPart {
634    type Error = ();
635
636    fn try_from(value: &str) -> Result<Self, Self::Error> {
637        Ok(match value {
638            "name" => AddressPart::Name,
639            "addr" | "all" => AddressPart::All,
640            "addr.domain" => AddressPart::Domain,
641            "addr.local" => AddressPart::LocalPart,
642            "addr.user" => AddressPart::User,
643            "addr.detail" => AddressPart::Detail,
644            _ => return Err(()),
645        })
646    }
647}
648
649impl Display for Value {
650    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
651        match self {
652            Value::Text(t) => f.write_str(t),
653            Value::List(l) => {
654                for i in l {
655                    i.fmt(f)?;
656                }
657                Ok(())
658            }
659            Value::Number(n) => n.fmt(f),
660            Value::Variable(v) => v.fmt(f),
661            Value::Regex(r) => f.write_str(&r.expr),
662        }
663    }
664}
665
666impl Display for VariableType {
667    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
668        match self {
669            VariableType::Local(v) => write!(f, "${{{v}}}"),
670            VariableType::Match(v) => write!(f, "${{{v}}}"),
671            VariableType::Global(v) => write!(f, "${{global.{v}}}"),
672            VariableType::Environment(v) => write!(f, "${{env.{v}}}"),
673
674            VariableType::Envelope(env) => f.write_str(match env {
675                Envelope::From => "${{envelope.from}}",
676                Envelope::To => "${{envelope.to}}",
677                Envelope::ByTimeAbsolute => "${{envelope.by_time_absolute}}",
678                Envelope::ByTimeRelative => "${{envelope.by_time_relative}}",
679                Envelope::ByMode => "${{envelope.by_mode}}",
680                Envelope::ByTrace => "${{envelope.by_trace}}",
681                Envelope::Notify => "${{envelope.notify}}",
682                Envelope::Orcpt => "${{envelope.orcpt}}",
683                Envelope::Ret => "${{envelope.ret}}",
684                Envelope::Envid => "${{envelope.envit}}",
685            }),
686
687            VariableType::Header(hdr) => {
688                write!(
689                    f,
690                    "${{header.{}",
691                    hdr.name.first().map(|h| h.as_str()).unwrap_or_default()
692                )?;
693                if hdr.index_hdr != 0 {
694                    write!(f, "[{}]", hdr.index_hdr)?;
695                } else {
696                    f.write_str("[*]")?;
697                }
698                /*if hdr.part != HeaderPart::Text {
699                    f.write_str(".")?;
700                    f.write_str(match &hdr.part {
701                        HeaderPart::Name => "name",
702                        HeaderPart::Address => "address",
703                        HeaderPart::Type => "type",
704                        HeaderPart::Subtype => "subtype",
705                        HeaderPart::Raw => "raw",
706                        HeaderPart::Date => "date",
707                        HeaderPart::Attribute(attr) => attr.as_str(),
708                        HeaderPart::Text => unreachable!(),
709                    })?;
710                }*/
711                if hdr.index_part != 0 {
712                    write!(f, "[{}]", hdr.index_part)?;
713                } else {
714                    f.write_str("[*]")?;
715                }
716                f.write_str("}")
717            }
718            VariableType::Part(part) => {
719                write!(
720                    f,
721                    "${{{}",
722                    match part {
723                        MessagePart::TextBody(true) => "body.to_text",
724                        MessagePart::TextBody(false) => "body.text",
725                        MessagePart::HtmlBody(true) => "body.to_html",
726                        MessagePart::HtmlBody(false) => "body.html",
727                        MessagePart::Contents => "part.text",
728                        MessagePart::Raw => "part.raw",
729                    }
730                )?;
731                f.write_str("}")
732            }
733        }
734    }
735}
736
737#[cfg(test)]
738mod tests {
739
740    use mail_parser::HeaderName;
741
742    use super::Value;
743    use crate::compiler::grammar::instruction::{Block, CompilerState, Instruction, MAX_PARAMS};
744    use crate::compiler::grammar::test::Test;
745    use crate::compiler::grammar::tests::test_string::TestString;
746    use crate::compiler::grammar::{Comparator, MatchType};
747    use crate::compiler::lexer::tokenizer::Tokenizer;
748    use crate::compiler::lexer::word::Word;
749    use crate::compiler::{AddressPart, HeaderPart, HeaderVariable, VariableType};
750    use crate::{AHashSet, Compiler};
751
752    #[test]
753    fn tokenize_string() {
754        let c = Compiler::new();
755        let mut block = Block::new(Word::Not);
756        block.match_test_pos.push(0);
757        let mut compiler = CompilerState {
758            compiler: &c,
759            instructions: vec![Instruction::Test(Test::String(TestString {
760                match_type: MatchType::Regex(u64::MAX),
761                comparator: Comparator::AsciiCaseMap,
762                source: vec![Value::Variable(VariableType::Local(0))],
763                key_list: vec![Value::Variable(VariableType::Local(0))],
764                is_not: false,
765            }))],
766            block_stack: Vec::new(),
767            block,
768            last_block_type: Word::Not,
769            vars_global: AHashSet::new(),
770            vars_num: 0,
771            vars_num_max: 0,
772            vars_local: 0,
773            tokens: Tokenizer::new(&c, b""),
774            vars_match_max: usize::MAX,
775            param_check: [false; MAX_PARAMS],
776            includes_num: 0,
777        };
778
779        for (input, expected_result) in [
780            ("$${hex:24 24}", Value::Text("$$$".to_string().into())),
781            ("$${hex:40}", Value::Text("$@".to_string().into())),
782            ("${hex: 40 }", Value::Text("@".to_string().into())),
783            ("${HEX: 40}", Value::Text("@".to_string().into())),
784            ("${hex:40", Value::Text("${hex:40".to_string().into())),
785            ("${hex:400}", Value::Text("${hex:400}".to_string().into())),
786            (
787                "${hex:4${hex:30}}",
788                Value::Text("${hex:40}".to_string().into()),
789            ),
790            ("${unicode:40}", Value::Text("@".to_string().into())),
791            (
792                "${ unicode:40}",
793                Value::Text("${ unicode:40}".to_string().into()),
794            ),
795            ("${UNICODE:40}", Value::Text("@".to_string().into())),
796            ("${UnICoDE:0000040}", Value::Text("@".to_string().into())),
797            ("${Unicode:40}", Value::Text("@".to_string().into())),
798            (
799                "${Unicode:40 40 ",
800                Value::Text("${Unicode:40 40 ".to_string().into()),
801            ),
802            (
803                "${Unicode:Cool}",
804                Value::Text("${Unicode:Cool}".to_string().into()),
805            ),
806            ("", Value::Text("".to_string().into())),
807            (
808                "${global.full}",
809                Value::Variable(VariableType::Global("full".to_string())),
810            ),
811            (
812                "${BAD${global.Company}",
813                Value::List(vec![
814                    Value::Text("${BAD".to_string().into()),
815                    Value::Variable(VariableType::Global("company".to_string())),
816                ]),
817            ),
818            (
819                "${President, ${global.Company} Inc.}",
820                Value::List(vec![
821                    Value::Text("${President, ".to_string().into()),
822                    Value::Variable(VariableType::Global("company".to_string())),
823                    Value::Text(" Inc.}".to_string().into()),
824                ]),
825            ),
826            (
827                "dear${hex:20 24 7b}global.Name}",
828                Value::List(vec![
829                    Value::Text("dear ".to_string().into()),
830                    Value::Variable(VariableType::Global("name".to_string())),
831                ]),
832            ),
833            (
834                "INBOX.lists.${2}",
835                Value::List(vec![
836                    Value::Text("INBOX.lists.".to_string().into()),
837                    Value::Variable(VariableType::Match(2)),
838                ]),
839            ),
840            (
841                "Ein unerh${unicode:00F6}rt gro${unicode:00DF}er Test",
842                Value::Text("Ein unerhört großer Test".to_string().into()),
843            ),
844            ("&%${}!", Value::Text("&%${}!".to_string().into())),
845            ("${doh!}", Value::Text("${doh!}".to_string().into())),
846            (
847                "${hex: 20 }${global.hi}${hex: 20 }",
848                Value::List(vec![
849                    Value::Text(" ".to_string().into()),
850                    Value::Variable(VariableType::Global("hi".to_string())),
851                    Value::Text(" ".to_string().into()),
852                ]),
853            ),
854            (
855                "${hex:20 24 7b z}${global.hi}${unicode:}${unicode: }${hex:20}",
856                Value::List(vec![
857                    Value::Text("${hex:20 24 7b z}".to_string().into()),
858                    Value::Variable(VariableType::Global("hi".to_string())),
859                    Value::Text("${unicode:}${unicode: } ".to_string().into()),
860                ]),
861            ),
862            (
863                "${header.from}",
864                Value::Variable(VariableType::Header(HeaderVariable {
865                    name: vec![HeaderName::From],
866                    part: HeaderPart::Text,
867                    index_hdr: -1,
868                    index_part: -1,
869                })),
870            ),
871            (
872                "${header.from.addr}",
873                Value::Variable(VariableType::Header(HeaderVariable {
874                    name: vec![HeaderName::From],
875                    part: HeaderPart::Address(AddressPart::All),
876                    index_hdr: -1,
877                    index_part: -1,
878                })),
879            ),
880            (
881                "${header.from[1]}",
882                Value::Variable(VariableType::Header(HeaderVariable {
883                    name: vec![HeaderName::From],
884                    part: HeaderPart::Text,
885                    index_hdr: 1,
886                    index_part: -1,
887                })),
888            ),
889            (
890                "${header.from[*]}",
891                Value::Variable(VariableType::Header(HeaderVariable {
892                    name: vec![HeaderName::From],
893                    part: HeaderPart::Text,
894                    index_hdr: 0,
895                    index_part: -1,
896                })),
897            ),
898            (
899                "${header.from[20].name}",
900                Value::Variable(VariableType::Header(HeaderVariable {
901                    name: vec![HeaderName::From],
902                    part: HeaderPart::Address(AddressPart::Name),
903                    index_hdr: 20,
904                    index_part: -1,
905                })),
906            ),
907            (
908                "${header.from[*].addr}",
909                Value::Variable(VariableType::Header(HeaderVariable {
910                    name: vec![HeaderName::From],
911                    part: HeaderPart::Address(AddressPart::All),
912                    index_hdr: 0,
913                    index_part: -1,
914                })),
915            ),
916            (
917                "${header.from[-5].name[2]}",
918                Value::Variable(VariableType::Header(HeaderVariable {
919                    name: vec![HeaderName::From],
920                    part: HeaderPart::Address(AddressPart::Name),
921                    index_hdr: -5,
922                    index_part: 2,
923                })),
924            ),
925            (
926                "${header.from[*].raw[*]}",
927                Value::Variable(VariableType::Header(HeaderVariable {
928                    name: vec![HeaderName::From],
929                    part: HeaderPart::Raw,
930                    index_hdr: 0,
931                    index_part: 0,
932                })),
933            ),
934        ] {
935            assert_eq!(
936                compiler.tokenize_string(input.as_bytes(), true).unwrap(),
937                expected_result,
938                "Failed for {input}"
939            );
940        }
941
942        for input in ["${unicode:200000}", "${Unicode:DF01}"] {
943            assert!(compiler.tokenize_string(input.as_bytes(), true).is_err());
944        }
945    }
946}