boreal_parser/
rule.rs

1//! Parse yara rules.
2use std::ops::Range;
3
4use nom::branch::alt;
5use nom::character::complete::char;
6use nom::combinator::{cut, map, opt};
7use nom::multi::many1;
8use nom::sequence::{delimited, pair, preceded, separated_pair};
9use nom::Parser;
10
11use super::{
12    error::{Error, ErrorKind},
13    expression::{self, Expression},
14    hex_string,
15    nom_recipes::{map_res, rtrim, textual_tag as ttag},
16    number, regex,
17    regex::Regex,
18    string,
19    types::{Input, ParseResult, Position},
20};
21
22/// A Yara rule.
23#[derive(Clone, Debug, PartialEq)]
24pub struct Rule {
25    /// Name of the rule.
26    pub name: String,
27
28    /// Span for the rule name.
29    pub name_span: Range<usize>,
30
31    /// Tags associated with the rule.
32    pub tags: Vec<RuleTag>,
33
34    /// Metadata associated with the rule.
35    pub metadatas: Vec<Metadata>,
36
37    /// Variables associated with the rule.
38    ///
39    /// In Yara terms, those are "strings" (and they are declared
40    /// with the "strings:" declaration in a rule).
41    /// However, the "string" denomination is exceedingly confusing in the
42    /// implementation. Instead, name those "variables", as they are
43    /// declared with a prefix '$', which in multiple languages
44    /// indicates variables.
45    pub variables: Vec<VariableDeclaration>,
46
47    /// Condition of the rule.
48    pub condition: Expression,
49
50    /// Is the rule private.
51    pub is_private: bool,
52    /// Is the rule global.
53    pub is_global: bool,
54}
55
56/// Tag for a rule.
57#[derive(Clone, Debug, PartialEq, Eq)]
58pub struct RuleTag {
59    /// The tag name.
60    pub tag: String,
61
62    /// Span covering the tag.
63    pub span: Range<usize>,
64}
65
66/// Value associated with a metadata key.
67#[derive(Clone, Debug, PartialEq, Eq)]
68pub enum MetadataValue {
69    /// Bytestring variant.
70    Bytes(Vec<u8>),
71    /// Integer variant.
72    Integer(i64),
73    /// Boolean variant.
74    Boolean(bool),
75}
76
77/// A metadata key-value, associated with a rule.
78#[derive(Clone, Debug, PartialEq, Eq)]
79pub struct Metadata {
80    /// Name of the metadata.
81    pub name: String,
82    /// Value of the metadata.
83    pub value: MetadataValue,
84}
85
86/// Value for a string associated with a rule.
87#[derive(Clone, Debug, PartialEq)]
88pub enum VariableDeclarationValue {
89    /// A raw byte string.
90    Bytes(Vec<u8>),
91    /// A regular expression.
92    Regex(Regex),
93    /// A hex string.
94    HexString(Vec<hex_string::Token>),
95}
96
97/// Modifiers applicable on a string.
98#[derive(Clone, Default, Debug, PartialEq, Eq)]
99// Completely useless lint
100#[allow(clippy::struct_excessive_bools)]
101pub struct VariableModifiers {
102    /// Wide modifier.
103    pub wide: bool,
104
105    /// Ascii modifier.
106    pub ascii: bool,
107
108    /// Nocase modifier.
109    pub nocase: bool,
110
111    /// Fullword modifier.
112    pub fullword: bool,
113
114    /// Private modifier.
115    pub private: bool,
116
117    /// Xor modifier, providing the range.
118    pub xor: Option<(u8, u8)>,
119
120    /// Base64 modifier.
121    pub base64: Option<VariableModifierBase64>,
122}
123
124/// Base64 variable modifier.
125#[derive(Clone, Debug, PartialEq, Eq)]
126pub struct VariableModifierBase64 {
127    /// Wide version.
128    pub wide: bool,
129
130    /// Ascii verison.
131    pub ascii: bool,
132
133    /// Alphabet to use to deserialize, if provided.
134    pub alphabet: Option<[u8; 64]>,
135}
136
137/// String declared in a rule.
138#[derive(Clone, Debug, PartialEq)]
139pub struct VariableDeclaration {
140    /// Name of the string.
141    pub name: String,
142    /// Value of the string.
143    pub value: VariableDeclarationValue,
144    /// Modifiers for the string.
145    pub modifiers: VariableModifiers,
146    /// Span for the whole declaration
147    pub span: Range<usize>,
148}
149
150/// Parse a rule
151///
152/// Related to the `rule` pattern in `grammar.y` in libyara.
153pub(crate) fn rule(mut input: Input) -> ParseResult<Rule> {
154    let mut is_private = false;
155    let mut is_global = false;
156
157    loop {
158        match rtrim(ttag("rule")).parse(input) {
159            Ok((i, _)) => {
160                input = i;
161                break;
162            }
163            Err(e) => {
164                if let Ok((i, _)) = rtrim(ttag("private")).parse(input) {
165                    input = i;
166                    is_private = true;
167                } else if let Ok((i, _)) = rtrim(ttag("global")).parse(input) {
168                    input = i;
169                    is_global = true;
170                } else {
171                    return Err(e);
172                }
173            }
174        }
175    }
176
177    map(
178        (
179            rule_name,
180            opt(tags),
181            delimited(
182                rtrim(char('{')),
183                (opt(meta), opt(strings), condition),
184                rtrim(char('}')),
185            ),
186        ),
187        move |((name, name_span), tags, (meta, strings, condition))| Rule {
188            name,
189            name_span,
190            tags: tags.unwrap_or_default(),
191            metadatas: meta.unwrap_or_default(),
192            variables: strings.unwrap_or_default(),
193            condition,
194            is_private,
195            is_global,
196        },
197    )
198    .parse(input)
199}
200
201fn rule_name(input: Input) -> ParseResult<(String, Range<usize>)> {
202    let start = input.pos();
203    let (input, name) = string::identifier(input)?;
204
205    Ok((input, (name, input.get_span_from(start))))
206}
207
208/// Parse a list of tags
209///
210/// This roughly parses `: identifier1 identifier2 ...`
211/// and returns a list of the identifiers.
212fn tags(input: Input) -> ParseResult<Vec<RuleTag>> {
213    let (input, _) = rtrim(char(':')).parse(input)?;
214
215    cut(many1(tag)).parse(input)
216}
217
218fn tag(input: Input) -> ParseResult<RuleTag> {
219    let start = input.pos();
220    let (input, tag) = string::identifier(input)?;
221
222    Ok((
223        input,
224        RuleTag {
225            tag,
226            span: input.get_span_from(start),
227        },
228    ))
229}
230
231/// Parse the "meta:" section in a rule.
232///
233/// Related to the `meta` and `meta_declarations` patterns
234/// in `grammar.y` in libyara.
235fn meta(input: Input) -> ParseResult<Vec<Metadata>> {
236    preceded(
237        pair(rtrim(ttag("meta")), rtrim(char(':'))),
238        cut(many1(meta_declaration)),
239    )
240    .parse(input)
241}
242
243/// Parse a single metadata declaration.
244///
245/// Related to the `meta_declaration` pattern in `grammar.y` in libyara.
246fn meta_declaration(input: Input) -> ParseResult<Metadata> {
247    map(
248        separated_pair(
249            string::identifier,
250            rtrim(char('=')),
251            alt((
252                map(string::quoted, MetadataValue::Bytes),
253                map(number::number, MetadataValue::Integer),
254                map(preceded(rtrim(char('-')), number::number), |v| {
255                    MetadataValue::Integer(-v)
256                }),
257                map(rtrim(ttag("true")), |_| MetadataValue::Boolean(true)),
258                map(rtrim(ttag("false")), |_| MetadataValue::Boolean(false)),
259            )),
260        ),
261        |(name, value)| Metadata { name, value },
262    )
263    .parse(input)
264}
265
266/// Parse the "strings:" section
267///
268/// Related to the `strings` and `strings_declarations` pattern
269/// in `grammar.y` in libyara.
270fn strings(input: Input) -> ParseResult<Vec<VariableDeclaration>> {
271    let (input, _) = pair(rtrim(ttag("strings")), rtrim(char(':'))).parse(input)?;
272    cut(many1(string_declaration)).parse(input)
273}
274
275/// Parse a single string declaration.
276///
277/// Related to the `string_declaration` pattern in `grammar.y` in libyara.
278fn string_declaration(input: Input) -> ParseResult<VariableDeclaration> {
279    let start = input.pos();
280
281    let (input, (name, (value, modifiers))) = separated_pair(
282        string::string_identifier,
283        cut(rtrim(char('='))),
284        cut(alt((
285            pair(
286                map(string::quoted, VariableDeclarationValue::Bytes),
287                string_modifiers,
288            ),
289            pair(
290                map(regex::regex, VariableDeclarationValue::Regex),
291                regex_modifiers,
292            ),
293            pair(
294                map(hex_string::hex_string, VariableDeclarationValue::HexString),
295                hex_string_modifiers,
296            ),
297        ))),
298    )
299    .parse(input)?;
300    Ok((
301        input,
302        VariableDeclaration {
303            name,
304            value,
305            modifiers,
306            span: input.get_span_from(start),
307        },
308    ))
309}
310
311/// A single parsed modifier
312#[derive(Clone, Debug, PartialEq)]
313enum Modifier {
314    Wide,
315    Ascii,
316    Nocase,
317    Fullword,
318    Private,
319    Xor(u8, u8),
320    Base64 {
321        wide: bool,
322        alphabet: Option<[u8; 64]>,
323    },
324}
325
326fn modifiers_duplicated(modifier_name: &str, start: Position, input: Input) -> nom::Err<Error> {
327    nom::Err::Failure(Error::new(
328        input.get_span_from(start),
329        ErrorKind::ModifiersDuplicated {
330            modifier_name: modifier_name.to_string(),
331        },
332    ))
333}
334
335fn accumulate_modifiers<F>(parser: F, mut input: Input) -> ParseResult<VariableModifiers>
336where
337    F: Fn(Input) -> ParseResult<Modifier>,
338{
339    let mut modifiers = VariableModifiers::default();
340    let start = input.pos();
341    let mut parser = opt(parser);
342
343    while let (i, Some(modifier)) = parser.parse(input)? {
344        match modifier {
345            Modifier::Wide => {
346                if modifiers.wide {
347                    return Err(modifiers_duplicated("wide", input.pos(), i));
348                }
349                modifiers.wide = true;
350            }
351            Modifier::Ascii => {
352                if modifiers.ascii {
353                    return Err(modifiers_duplicated("ascii", input.pos(), i));
354                }
355                modifiers.ascii = true;
356            }
357            Modifier::Nocase => {
358                if modifiers.nocase {
359                    return Err(modifiers_duplicated("nocase", input.pos(), i));
360                }
361                modifiers.nocase = true;
362            }
363            Modifier::Fullword => {
364                if modifiers.fullword {
365                    return Err(modifiers_duplicated("fullword", input.pos(), i));
366                }
367                modifiers.fullword = true;
368            }
369            Modifier::Private => {
370                if modifiers.private {
371                    return Err(modifiers_duplicated("private", input.pos(), i));
372                }
373                modifiers.private = true;
374            }
375            Modifier::Xor(from, to) => {
376                if modifiers.xor.is_some() {
377                    return Err(modifiers_duplicated("xor", input.pos(), i));
378                }
379                modifiers.xor = Some((from, to));
380            }
381            Modifier::Base64 { wide, alphabet } => match &mut modifiers.base64 {
382                Some(base64) => {
383                    if wide && std::mem::replace(&mut base64.wide, true) {
384                        return Err(modifiers_duplicated("base64wide", input.pos(), i));
385                    } else if !wide && std::mem::replace(&mut base64.ascii, true) {
386                        return Err(modifiers_duplicated("base64", input.pos(), i));
387                    } else if alphabet != base64.alphabet {
388                        return Err(nom::Err::Failure(Error::new(
389                            i.get_span_from(input.pos()),
390                            ErrorKind::Base64AlphabetIncompatible,
391                        )));
392                    }
393                    base64.alphabet = alphabet;
394                }
395                None => {
396                    modifiers.base64 = Some(VariableModifierBase64 {
397                        ascii: !wide,
398                        wide,
399                        alphabet,
400                    });
401                }
402            },
403        }
404        input = i;
405    }
406
407    if let Err(kind) = validate_modifiers(&modifiers) {
408        return Err(nom::Err::Failure(Error::new(
409            input.get_span_from(start),
410            kind,
411        )));
412    }
413
414    Ok((input, modifiers))
415}
416
417fn validate_modifiers(modifiers: &VariableModifiers) -> Result<(), ErrorKind> {
418    if modifiers.xor.is_some() {
419        if modifiers.nocase {
420            return Err(ErrorKind::ModifiersIncompatible {
421                first_modifier_name: "xor".to_owned(),
422                second_modifier_name: "nocase".to_owned(),
423            });
424        }
425        if let Some(base64) = &modifiers.base64 {
426            return Err(ErrorKind::ModifiersIncompatible {
427                first_modifier_name: if base64.ascii { "base64" } else { "base64wide" }.to_owned(),
428                second_modifier_name: "xor".to_owned(),
429            });
430        }
431    }
432    if modifiers.nocase {
433        if let Some(base64) = &modifiers.base64 {
434            return Err(ErrorKind::ModifiersIncompatible {
435                first_modifier_name: if base64.ascii { "base64" } else { "base64wide" }.to_owned(),
436                second_modifier_name: "nocase".to_owned(),
437            });
438        }
439    }
440
441    if modifiers.fullword {
442        if let Some(base64) = &modifiers.base64 {
443            return Err(ErrorKind::ModifiersIncompatible {
444                first_modifier_name: if base64.ascii { "base64" } else { "base64wide" }.to_owned(),
445                second_modifier_name: "fullword".to_owned(),
446            });
447        }
448    }
449
450    Ok(())
451}
452
453fn string_modifiers(input: Input) -> ParseResult<VariableModifiers> {
454    accumulate_modifiers(string_modifier, input)
455}
456
457fn regex_modifiers(input: Input) -> ParseResult<VariableModifiers> {
458    accumulate_modifiers(regex_modifier, input)
459}
460
461fn hex_string_modifiers(input: Input) -> ParseResult<VariableModifiers> {
462    accumulate_modifiers(hex_string_modifier, input)
463}
464
465fn string_modifier(input: Input) -> ParseResult<Modifier> {
466    alt((
467        map(rtrim(ttag("wide")), |_| Modifier::Wide),
468        map(rtrim(ttag("ascii")), |_| Modifier::Ascii),
469        map(rtrim(ttag("nocase")), |_| Modifier::Nocase),
470        map(rtrim(ttag("fullword")), |_| Modifier::Fullword),
471        map(rtrim(ttag("private")), |_| Modifier::Private),
472        xor_modifier,
473        base64_modifier,
474    ))
475    .parse(input)
476}
477
478fn regex_modifier(input: Input) -> ParseResult<Modifier> {
479    rtrim(alt((
480        map(ttag("wide"), |_| Modifier::Wide),
481        map(ttag("ascii"), |_| Modifier::Ascii),
482        map(ttag("nocase"), |_| Modifier::Nocase),
483        map(ttag("fullword"), |_| Modifier::Fullword),
484        map(ttag("private"), |_| Modifier::Private),
485    )))
486    .parse(input)
487}
488
489fn hex_string_modifier(input: Input) -> ParseResult<Modifier> {
490    map(rtrim(ttag("private")), |_| Modifier::Private).parse(input)
491}
492
493/// Parse a XOR modifier, ie:
494/// - `'xor'`
495/// - `'xor' '(' number ')'`
496/// - `'xor' '(' number '-' number ')'`
497fn xor_modifier(input: Input) -> ParseResult<Modifier> {
498    let (input, _) = rtrim(ttag("xor")).parse(input)?;
499
500    let start = input.pos();
501    let (input, open_paren) = opt(rtrim(char('('))).parse(input)?;
502    if open_paren.is_none() {
503        return Ok((input, Modifier::Xor(0, 255)));
504    }
505
506    let (input, from) = cut(map_res(number::number, number_to_u8)).parse(input)?;
507
508    let (input, to) = match rtrim(char('-')).parse(input) {
509        Ok((input, _)) => cut(map_res(number::number, number_to_u8)).parse(input)?,
510        Err(_) => (input, from),
511    };
512
513    let (input, _) = cut(rtrim(char(')'))).parse(input)?;
514
515    if to < from {
516        Err(nom::Err::Failure(Error::new(
517            input.get_span_from(start),
518            ErrorKind::XorRangeInvalid { from, to },
519        )))
520    } else {
521        Ok((input, Modifier::Xor(from, to)))
522    }
523}
524
525/// Parse a base64 modifier, ie:
526/// - `'base64(wide)'`
527/// - `'base64(wide)' '(' string ')'`
528fn base64_modifier(input: Input) -> ParseResult<Modifier> {
529    let (input, wide) = rtrim(alt((
530        map(ttag("base64"), |_| false),
531        map(ttag("base64wide"), |_| true),
532    )))
533    .parse(input)?;
534
535    let (mut input, open_paren) = opt(rtrim(char('('))).parse(input)?;
536
537    let mut alphabet: Option<[u8; 64]> = None;
538    if open_paren.is_some() {
539        let start = input.pos();
540        let (input2, val) = cut(string::quoted).parse(input)?;
541        let length = val.len();
542        match val.try_into() {
543            Ok(v) => alphabet = Some(v),
544            Err(_) => {
545                return Err(nom::Err::Failure(Error::new(
546                    input2.get_span_from(start),
547                    ErrorKind::Base64AlphabetInvalidLength { length },
548                )));
549            }
550        }
551        let (input2, _) = cut(rtrim(char(')'))).parse(input2)?;
552        input = input2;
553    }
554
555    Ok((input, Modifier::Base64 { wide, alphabet }))
556}
557
558fn number_to_u8(value: i64) -> Result<u8, ErrorKind> {
559    u8::try_from(value).map_err(|_| ErrorKind::XorRangeInvalidValue { value })
560}
561
562/// Parse a condition
563///
564/// Related to the `condition` pattern in `grammar.y` in libyara.
565fn condition(input: Input) -> ParseResult<Expression> {
566    let (input, _) = rtrim(ttag("condition")).parse(input)?;
567    cut(preceded(rtrim(char(':')), expression::expression)).parse(input)
568}
569
570#[cfg(test)]
571mod tests {
572    use crate::expression::{ExpressionKind, ForSelection, VariableSet};
573    use crate::hex_string::{Mask, Token};
574    use crate::regex::Literal;
575    use crate::test_helpers::test_public_type;
576
577    use super::super::test_helpers::{parse, parse_err};
578    use super::*;
579
580    #[test]
581    fn parse_tags() {
582        parse(
583            tags,
584            ": a _ a8 {",
585            "{",
586            vec![
587                RuleTag {
588                    tag: "a".to_owned(),
589                    span: 2..3,
590                },
591                RuleTag {
592                    tag: "_".to_owned(),
593                    span: 4..5,
594                },
595                RuleTag {
596                    tag: "a8".to_owned(),
597                    span: 6..8,
598                },
599            ],
600        );
601        parse(
602            tags,
603            ": b 8",
604            "8",
605            vec![RuleTag {
606                tag: "b".to_owned(),
607                span: 2..3,
608            }],
609        );
610
611        parse_err(tags, "");
612        parse_err(tags, ":");
613        parse_err(tags, ": {");
614    }
615
616    #[test]
617    fn parse_meta() {
618        parse(
619            meta,
620            "meta : a = 3 b =-4 _=true d",
621            "d",
622            vec![
623                Metadata {
624                    name: "a".to_owned(),
625                    value: MetadataValue::Integer(3),
626                },
627                Metadata {
628                    name: "b".to_owned(),
629                    value: MetadataValue::Integer(-4),
630                },
631                Metadata {
632                    name: "_".to_owned(),
633                    value: MetadataValue::Boolean(true),
634                },
635            ],
636        );
637        parse(
638            meta,
639            "meta:\n  a = \" a\rb \"  \n  b= false \n  strings",
640            "strings",
641            vec![
642                Metadata {
643                    name: "a".to_owned(),
644                    value: MetadataValue::Bytes(b" a\rb ".to_vec()),
645                },
646                Metadata {
647                    name: "b".to_owned(),
648                    value: MetadataValue::Boolean(false),
649                },
650            ],
651        );
652        parse(
653            meta,
654            "meta: a = false test = True",
655            "test = True",
656            vec![Metadata {
657                name: "a".to_owned(),
658                value: MetadataValue::Boolean(false),
659            }],
660        );
661        parse(
662            meta,
663            "meta: a = \"\" d",
664            "d",
665            vec![Metadata {
666                name: "a".to_owned(),
667                value: MetadataValue::Bytes(Vec::new()),
668            }],
669        );
670
671        parse_err(meta, "");
672        parse_err(meta, "meta");
673        parse_err(meta, "meta:");
674    }
675
676    #[test]
677    fn parse_modifiers() {
678        parse(
679            string_modifiers,
680            "private wide ascii xor Xor",
681            "Xor",
682            VariableModifiers {
683                wide: true,
684                ascii: true,
685                nocase: false,
686                fullword: false,
687                private: true,
688                xor: Some((0, 255)),
689                base64: None,
690            },
691        );
692        parse(
693            string_modifiers,
694            "nocase fullword",
695            "",
696            VariableModifiers {
697                wide: false,
698                ascii: false,
699                nocase: true,
700                fullword: true,
701                private: false,
702                xor: None,
703                base64: None,
704            },
705        );
706        parse(
707            string_modifiers,
708            "base64wide ascii",
709            "",
710            VariableModifiers {
711                wide: false,
712                ascii: true,
713                nocase: false,
714                fullword: false,
715                private: false,
716                xor: None,
717                base64: Some(VariableModifierBase64 {
718                    wide: true,
719                    ascii: false,
720                    alphabet: None,
721                }),
722            },
723        );
724
725        parse(
726            string_modifiers,
727            "xor ( 15 )",
728            "",
729            VariableModifiers {
730                wide: false,
731                ascii: false,
732                nocase: false,
733                fullword: false,
734                private: false,
735                xor: Some((15, 15)),
736                base64: None,
737            },
738        );
739        parse(
740            string_modifiers,
741            "xor (50 - 120) private",
742            "",
743            VariableModifiers {
744                wide: false,
745                ascii: false,
746                nocase: false,
747                fullword: false,
748                private: true,
749                xor: Some((50, 120)),
750                base64: None,
751            },
752        );
753
754        let alphabet = "!@#$%^&*(){}[].,|ABCDEFGHIJ\x09LMNOPQRSTUVWXYZabcdefghijklmnopqrstu";
755        let alphabet_array: [u8; 64] = alphabet.as_bytes().try_into().unwrap();
756        parse(
757            string_modifiers,
758            &format!("base64( \"{alphabet}\" )"),
759            "",
760            VariableModifiers {
761                wide: false,
762                ascii: false,
763                nocase: false,
764                fullword: false,
765                private: false,
766                xor: None,
767                base64: Some(VariableModifierBase64 {
768                    wide: false,
769                    ascii: true,
770                    alphabet: Some(alphabet_array),
771                }),
772            },
773        );
774        parse(
775            string_modifiers,
776            &format!("base64wide ( \"{alphabet}\" ) private"),
777            "",
778            VariableModifiers {
779                wide: false,
780                ascii: false,
781                nocase: false,
782                fullword: false,
783                private: true,
784                xor: None,
785                base64: Some(VariableModifierBase64 {
786                    wide: true,
787                    ascii: false,
788                    alphabet: Some(alphabet_array),
789                }),
790            },
791        );
792        parse(
793            string_modifiers,
794            &format!("base64wide ( \"{alphabet}\" ) base64 (\"{alphabet}\")"),
795            "",
796            VariableModifiers {
797                wide: false,
798                ascii: false,
799                nocase: false,
800                fullword: false,
801                private: false,
802                xor: None,
803                base64: Some(VariableModifierBase64 {
804                    wide: true,
805                    ascii: true,
806                    alphabet: Some(alphabet_array),
807                }),
808            },
809        );
810
811        parse(
812            regex_modifiers,
813            "private wide ascii nocase fullword base64",
814            "base64",
815            VariableModifiers {
816                wide: true,
817                ascii: true,
818                nocase: true,
819                fullword: true,
820                private: true,
821                xor: None,
822                base64: None,
823            },
824        );
825
826        parse(
827            hex_string_modifiers,
828            "private wide",
829            "wide",
830            VariableModifiers {
831                wide: false,
832                ascii: false,
833                nocase: false,
834                fullword: false,
835                private: true,
836                xor: None,
837                base64: None,
838            },
839        );
840
841        parse_err(string_modifier, "");
842        parse_err(string_modifier, "w");
843
844        parse_err(regex_modifier, "");
845        parse_err(regex_modifier, "w");
846        parse_err(regex_modifier, "base64");
847        parse_err(regex_modifier, "base64wide");
848        parse_err(regex_modifier, "xor");
849
850        parse_err(hex_string_modifier, "");
851        parse_err(hex_string_modifier, "w");
852        parse_err(hex_string_modifier, "ascii");
853        parse_err(hex_string_modifier, "wide");
854        parse_err(hex_string_modifier, "nocase");
855        parse_err(hex_string_modifier, "fullword");
856        parse_err(hex_string_modifier, "base64");
857        parse_err(hex_string_modifier, "base64wide");
858        parse_err(hex_string_modifier, "xor");
859    }
860
861    #[test]
862    fn test_flags_validation() {
863        parse_err(string_modifiers, "xor nocase");
864        parse_err(string_modifiers, "base64 nocase");
865        parse_err(string_modifiers, "nocase base64wide");
866        parse_err(string_modifiers, "fullword base64");
867        parse_err(string_modifiers, "base64wide fullword");
868        parse_err(string_modifiers, "xor xor");
869        parse_err(string_modifiers, "xor(300)");
870        parse_err(string_modifiers, "xor base64");
871        parse_err(string_modifiers, "xor base64wide");
872    }
873
874    #[test]
875    fn test_err_accumulate_modifiers() {
876        let alphabet = "!@#$%^&*(){}[].,|ABCDEFGHIJ\x09LMNOPQRSTUVWXYZabcdefghijklmnopqrstu";
877        let alphabet2 = "!@#$%^&*(){}[].,|BADCFEHGJI\x09LMNOPQRSTUVWXYZabcdefghijklmnopqrstu";
878
879        parse_err(string_modifiers, "xor xor");
880        parse_err(string_modifiers, "base64 base64");
881        parse_err(string_modifiers, "base64wide base64wide");
882        parse_err(string_modifiers, "fullword fullword");
883        parse_err(string_modifiers, "private private");
884        parse_err(string_modifiers, "wide wide");
885        parse_err(string_modifiers, "ascii ascii");
886        parse_err(string_modifiers, "nocase nocase");
887
888        parse_err(regex_modifiers, "fullword fullword");
889        parse_err(regex_modifiers, "private private");
890        parse_err(regex_modifiers, "wide wide");
891        parse_err(regex_modifiers, "ascii ascii");
892        parse_err(regex_modifiers, "nocase nocase");
893
894        parse_err(hex_string_modifiers, "private private");
895
896        parse_err(
897            string_modifiers,
898            &format!(r#"base64 base64wide("{alphabet}")"#),
899        );
900        parse_err(
901            string_modifiers,
902            &format!(r#"base64("{alphabet}") base64wide"#),
903        );
904        parse_err(
905            string_modifiers,
906            &format!(r#"base64wide("{alphabet}") base64"#),
907        );
908        parse_err(
909            string_modifiers,
910            &format!(r#"base64("{alphabet}") base64wide("{alphabet2}")"#),
911        );
912    }
913
914    #[test]
915    fn parse_strings() {
916        parse(
917            strings,
918            "strings : $a = \"b\td\" xor ascii \n  $b= /a?b/  $= { ?B} private d",
919            "d",
920            [
921                VariableDeclaration {
922                    name: "a".to_owned(),
923                    value: VariableDeclarationValue::Bytes(b"b\td".to_vec()),
924                    modifiers: VariableModifiers {
925                        ascii: true,
926                        xor: Some((0, 255)),
927                        ..VariableModifiers::default()
928                    },
929                    span: 10..30,
930                },
931                VariableDeclaration {
932                    name: "b".to_owned(),
933                    value: VariableDeclarationValue::Regex(Regex {
934                        ast: regex::Node::Concat(vec![
935                            regex::Node::Repetition {
936                                node: Box::new(regex::Node::Literal(Literal {
937                                    byte: b'a',
938                                    span: 39..40,
939                                    escaped: false,
940                                })),
941                                kind: regex::RepetitionKind::ZeroOrOne,
942                                greedy: true,
943                            },
944                            regex::Node::Literal(Literal {
945                                byte: b'b',
946                                span: 41..42,
947                                escaped: false,
948                            }),
949                        ]),
950                        case_insensitive: false,
951                        dot_all: false,
952                        span: 38..43,
953                    }),
954                    modifiers: VariableModifiers {
955                        ..VariableModifiers::default()
956                    },
957                    span: 34..43,
958                },
959                VariableDeclaration {
960                    name: String::new(),
961                    value: VariableDeclarationValue::HexString(vec![Token::MaskedByte(
962                        0x0B,
963                        Mask::Left,
964                    )]),
965                    modifiers: VariableModifiers {
966                        private: true,
967                        ..VariableModifiers::default()
968                    },
969                    span: 45..61,
970                },
971            ],
972        );
973
974        parse_err(strings, "");
975        parse_err(strings, "strings");
976        parse_err(strings, "strings:");
977    }
978
979    #[test]
980    fn parse_rule() {
981        parse(
982            rule,
983            "rule a { condition: false }",
984            "",
985            Rule {
986                name: "a".to_owned(),
987                name_span: 5..6,
988                condition: Expression {
989                    expr: ExpressionKind::Boolean(false),
990                    span: 20..25,
991                },
992                tags: Vec::new(),
993                metadatas: Vec::new(),
994                variables: Vec::new(),
995                is_private: false,
996                is_global: false,
997            },
998        );
999        parse(
1000            rule,
1001            "private global rule b : tag1 tag2 { meta: a = true strings: $b = \"t\" condition: all of them }",
1002            "",
1003            Rule {
1004                name: "b".to_owned(),
1005                name_span: 20..21,
1006                tags: vec![RuleTag { tag: "tag1".to_owned(), span: 24..28 }, RuleTag { tag: "tag2".to_owned(), span: 29..33 }],
1007                metadatas: vec![
1008                    Metadata { name: "a".to_owned(), value: MetadataValue::Boolean(true) }
1009                ],
1010                variables: vec![
1011                    VariableDeclaration {
1012                        name: "b".to_owned(),
1013                        value: VariableDeclarationValue::Bytes(b"t".to_vec()),
1014                        modifiers: VariableModifiers::default(),
1015                        span: 60..68,
1016                    }
1017                ],
1018                condition: Expression {
1019                    expr: ExpressionKind::For {
1020                        selection: ForSelection::All,
1021                        set: VariableSet { elements: vec![] },
1022                        body: None,
1023                    },
1024                    span: 80..91
1025                },
1026                is_private: true,
1027                is_global: true,
1028            },
1029        );
1030
1031        parse(
1032            rule,
1033            "global private rule c { condition: false }",
1034            "",
1035            Rule {
1036                name: "c".to_owned(),
1037                name_span: 20..21,
1038                condition: Expression {
1039                    expr: ExpressionKind::Boolean(false),
1040                    span: 35..40,
1041                },
1042                tags: Vec::new(),
1043                metadatas: Vec::new(),
1044                variables: Vec::new(),
1045                is_private: true,
1046                is_global: true,
1047            },
1048        );
1049        parse(
1050            rule,
1051            "private rule c { condition: false }",
1052            "",
1053            Rule {
1054                name: "c".to_owned(),
1055                name_span: 13..14,
1056                condition: Expression {
1057                    expr: ExpressionKind::Boolean(false),
1058                    span: 28..33,
1059                },
1060                tags: Vec::new(),
1061                metadatas: Vec::new(),
1062                variables: Vec::new(),
1063                is_private: true,
1064                is_global: false,
1065            },
1066        );
1067        parse(
1068            rule,
1069            "global rule c { condition: false }",
1070            "",
1071            Rule {
1072                name: "c".to_owned(),
1073                name_span: 12..13,
1074                condition: Expression {
1075                    expr: ExpressionKind::Boolean(false),
1076                    span: 27..32,
1077                },
1078                tags: Vec::new(),
1079                metadatas: Vec::new(),
1080                variables: Vec::new(),
1081                is_private: false,
1082                is_global: true,
1083            },
1084        );
1085
1086        parse_err(rule, "");
1087        parse_err(rule, "rule");
1088        parse_err(rule, "rule {}");
1089        parse_err(rule, "rule a {}");
1090        parse_err(rule, "rule b { condition true }");
1091        parse_err(
1092            rule,
1093            "rule c { strings: $a = /a/ meta: a = 3 condition: true }",
1094        );
1095        parse_err(rule, "rule d { condition: true");
1096    }
1097
1098    // Test that we use textual tags
1099    #[test]
1100    fn test_tags() {
1101        parse_err(rule, "rulea{condition:true}");
1102        parse_err(rule, "privaterule a{condition:true}");
1103        parse_err(rule, "globalrule a{condition:true}");
1104
1105        parse_err(meta, "meta: a=trueb=false");
1106        parse_err(meta, "meta: a=falseb=true");
1107
1108        parse_err(string_modifier, "widexor");
1109        parse_err(string_modifier, "asciixor");
1110        parse_err(string_modifier, "nocasexor");
1111        parse_err(string_modifier, "fullwordxor");
1112        parse_err(string_modifier, "privatexor");
1113        parse_err(string_modifier, "xorwide");
1114        parse_err(string_modifier, "base64xor");
1115        parse_err(string_modifier, "base64widexor");
1116
1117        parse_err(regex_modifier, "widexor");
1118        parse_err(regex_modifier, "asciixor");
1119        parse_err(regex_modifier, "nocasexor");
1120        parse_err(regex_modifier, "fullwordxor");
1121        parse_err(regex_modifier, "privatexor");
1122
1123        parse_err(hex_string_modifier, "privatexor");
1124    }
1125
1126    #[test]
1127    fn parse_xor_modifier() {
1128        parse(xor_modifier, "xor a", "a", Modifier::Xor(0, 255));
1129        parse(xor_modifier, "xor(23)", "", Modifier::Xor(23, 23));
1130        parse(xor_modifier, "xor ( 12 -15 )b", "b", Modifier::Xor(12, 15));
1131
1132        parse_err(xor_modifier, "");
1133        parse_err(xor_modifier, "xora");
1134        parse_err(xor_modifier, "xor(");
1135        parse_err(xor_modifier, "xor(//");
1136        parse_err(xor_modifier, "xor(13");
1137        parse_err(xor_modifier, "xor()");
1138        parse_err(xor_modifier, "xor(-1)");
1139        parse_err(xor_modifier, "xor(256)");
1140        parse_err(xor_modifier, "xor(50-4)");
1141        parse_err(xor_modifier, "xor(0-256)");
1142    }
1143
1144    #[test]
1145    fn parse_base64_modifier() {
1146        let alphabet = "!@#$%^&*(){}[].,|ABCDEFGHIJ\x09LMNOPQRSTUVWXYZabcdefghijklmnopqrstu";
1147        let alphabet_array: [u8; 64] = alphabet.as_bytes().try_into().unwrap();
1148
1149        parse(
1150            base64_modifier,
1151            "base64 a",
1152            "a",
1153            Modifier::Base64 {
1154                wide: false,
1155                alphabet: None,
1156            },
1157        );
1158        parse(
1159            base64_modifier,
1160            "base64wide a",
1161            "a",
1162            Modifier::Base64 {
1163                wide: true,
1164                alphabet: None,
1165            },
1166        );
1167        parse(
1168            base64_modifier,
1169            &format!(r#"base64("{alphabet}")"#),
1170            "",
1171            Modifier::Base64 {
1172                wide: false,
1173                alphabet: Some(alphabet_array),
1174            },
1175        );
1176        parse(
1177            base64_modifier,
1178            &format!(r#"base64wide ( "{alphabet}")b"#),
1179            "b",
1180            Modifier::Base64 {
1181                wide: true,
1182                alphabet: Some(alphabet_array),
1183            },
1184        );
1185
1186        parse_err(base64_modifier, "");
1187        parse_err(base64_modifier, "base64a");
1188        parse_err(base64_modifier, "base64widea");
1189        parse_err(base64_modifier, "base64(");
1190        parse_err(base64_modifier, "base64wide(");
1191        parse_err(base64_modifier, "base64wide(//");
1192        parse_err(base64_modifier, &format!(r#"base64("{alphabet}""#));
1193        parse_err(base64_modifier, "base64(\"123\")");
1194        parse_err(base64_modifier, "base64wide(15)");
1195    }
1196
1197    #[test]
1198    fn test_public_types() {
1199        test_public_type(
1200            rule(Input::new(
1201                r#"private rule a : tag {
1202    meta:
1203        a = "a"
1204        b = 2
1205        c = true
1206    strings:
1207        $a = { 01 }
1208        $b = "02" xor(15-30)
1209        $c = "02" base64("!@#$%^&*(){}[].,|ABCDEFGHIJ\x09LMNOPQRSTUVWXYZabcdefghijklmnopqrstu")
1210        $d = /ab/ wide
1211    condition:
1212      any of them
1213}
1214"#,
1215            ))
1216            .unwrap(),
1217        );
1218
1219        test_public_type(string_modifier(Input::new("wide")).unwrap());
1220    }
1221}