yara_x/compiler/
rules.rs

1use std::fmt;
2use std::io::{BufWriter, Read, Write};
3use std::ops::{Bound, RangeBounds};
4use std::slice::Iter;
5#[cfg(feature = "logging")]
6use std::time::Instant;
7
8use aho_corasick::AhoCorasick;
9#[cfg(feature = "logging")]
10use log::*;
11use regex_automata::meta::Regex;
12use rustc_hash::FxHashMap;
13use serde::{Deserialize, Deserializer, Serialize, Serializer};
14
15use crate::compiler::atoms::Atom;
16use crate::compiler::errors::SerializationError;
17use crate::compiler::report::CodeLoc;
18use crate::compiler::warnings::Warning;
19use crate::compiler::{
20    IdentId, Imports, LiteralId, NamespaceId, PatternId, RegexpId, RuleId,
21    SubPattern, SubPatternId,
22};
23use crate::models::PatternKind;
24use crate::re::{BckCodeLoc, FwdCodeLoc, RegexpAtom};
25use crate::string_pool::{BStringPool, StringPool};
26use crate::{re, types, wasm, Rule};
27
28/// A set of YARA rules in compiled form.
29///
30/// This is the result from [`crate::Compiler::build`].
31#[derive(Serialize, Deserialize)]
32pub struct Rules {
33    /// Pool with identifiers used in the rules. Each identifier has its
34    /// own [`IdentId`], which can be used for retrieving the identifier
35    /// from the pool as a `&str`.
36    pub(in crate::compiler) ident_pool: StringPool<IdentId>,
37
38    /// Pool with the regular expressions used in the rules conditions. Each
39    /// regular expression has its own [`RegexpId`]. Regular expressions
40    /// include the starting and ending slashes (`/`), and the modifiers
41    /// `i` and `s` if present (e.g: `/foobar/`, `/foo/i`, `/bar/s`).
42    pub(in crate::compiler) regexp_pool: StringPool<RegexpId>,
43
44    /// If `true`, the regular expressions in `regexp_pool` are allowed to
45    /// contain invalid escape sequences.
46    pub(in crate::compiler) relaxed_re_syntax: bool,
47
48    /// Pool with literal strings used in the rules. Each literal has its
49    /// own [`LiteralId`], which can be used for retrieving the literal
50    /// string as `&BStr`.
51    pub(in crate::compiler) lit_pool: BStringPool<LiteralId>,
52
53    /// WASM module as in raw form.
54    pub(in crate::compiler) wasm_mod: Vec<u8>,
55
56    /// WASM module already compiled into native code for the current platform.
57    /// When the rules are serialized, the compiled module is included only if
58    /// the `native-code-serialization` is enabled.
59    #[serde(
60        serialize_with = "serialize_wasm_mod",
61        deserialize_with = "deserialize_wasm_mod"
62    )]
63    pub(in crate::compiler) compiled_wasm_mod: Option<wasmtime::Module>,
64
65    /// Vector with the names of all the imported modules. The vector contains
66    /// the [`IdentId`] corresponding to the module's identifier.
67    pub(in crate::compiler) imported_modules: Vec<IdentId>,
68
69    /// Vector containing all the compiled rules. A [`RuleId`] is an index
70    /// in this vector.
71    pub(in crate::compiler) rules: Vec<RuleInfo>,
72
73    /// Total number of patterns across all rules. This is equal to the last
74    /// [`PatternId`] +  1.
75    pub(in crate::compiler) num_patterns: usize,
76
77    /// Vector with all the sub-patterns from all rules. A [`SubPatternId`]
78    /// is an index in this vector. Each pattern is composed of one or more
79    /// sub-patterns, if any of the sub-patterns matches, the pattern matches.
80    ///
81    /// For example, when a text pattern is accompanied by both the `ascii`
82    /// and `wide` modifiers, two sub-patterns are generated for it: one for
83    /// the ascii variant, and the other for the wide variant.
84    ///
85    /// Each sub-pattern in this vector is accompanied by the [`PatternId`]
86    /// where the sub-pattern belongs to.
87    pub(in crate::compiler) sub_patterns: Vec<(PatternId, SubPattern)>,
88
89    /// Map that associates a `PatternId` to a certain file size bound.
90    ///
91    /// A condition like `filesize < 1000 and $a` only matches if `filesize`
92    /// is less than 1000. Therefore, the pattern `$a` does not need be
93    /// checked for files of size 1000 bytes or larger.
94    ///
95    /// In this case, the map will contain an entry associating `$a` to a
96    /// `FilesizeBounds` value like:
97    ///
98    /// `FilesizeBounds{start: Bound::Unbounded, end: Bound:Excluded(1000)}`.
99    pub(in crate::compiler) filesize_bounds:
100        FxHashMap<PatternId, FilesizeBounds>,
101
102    /// Vector that contains the [`SubPatternId`] for sub-patterns that can
103    /// match only at a fixed offset within the scanned data. These sub-patterns
104    /// are not added to the Aho-Corasick automaton.
105    pub(in crate::compiler) anchored_sub_patterns: Vec<SubPatternId>,
106
107    /// A vector that contains all the atoms extracted from the patterns. Each
108    /// atom has an associated [`SubPatternId`] that indicates the sub-pattern
109    /// it belongs to.
110    pub(in crate::compiler) atoms: Vec<SubPatternAtom>,
111
112    /// A vector that contains the code for all regexp patterns (this includes
113    /// hex patterns which are just a special case of regexp). The code for
114    /// each regexp is appended to the vector, during the compilation process
115    /// and the atoms extracted from the regexp contain offsets within this
116    /// vector. This vector contains both forward and backward code.
117    pub(in crate::compiler) re_code: Vec<u8>,
118
119    /// A [`types::Struct`] in serialized form that contains all the global
120    /// variables. Each field in the structure corresponds to a global variable
121    /// defined at compile time using [`crate::compiler::Compiler`].
122    pub(in crate::compiler) serialized_globals: Vec<u8>,
123
124    /// Aho-Corasick automaton containing the atoms extracted from the patterns.
125    /// This allows to search for all the atoms in the scanned data at the same
126    /// time in an efficient manner. The automaton is not serialized during when
127    /// [`Rules::serialize`] is called, it needs to be wrapped in [`Option`] so
128    /// that we can use `#[serde(skip)]` on it because [`AhoCorasick`] doesn't
129    /// implement the [`Default`] trait.
130    #[serde(skip)]
131    pub(in crate::compiler) ac: Option<AhoCorasick>,
132
133    /// Warnings that were produced while compiling these rules. These warnings
134    /// are not serialized, rules that are obtained by deserializing previously
135    /// serialized rules won't have any warnings.
136    #[serde(skip)]
137    pub(in crate::compiler) warnings: Vec<Warning>,
138}
139
140impl Rules {
141    /// An iterator that yields the name of the modules imported by the
142    /// rules.
143    pub fn imports(&self) -> Imports<'_> {
144        Imports {
145            iter: self.imported_modules.iter(),
146            ident_pool: &self.ident_pool,
147        }
148    }
149
150    /// Warnings produced while compiling these rules.
151    pub fn warnings(&self) -> &[Warning] {
152        self.warnings.as_slice()
153    }
154
155    /// Serializes the rules as a sequence of bytes.
156    ///
157    /// The [`Rules`] can be restored back by passing the bytes to
158    /// [`Rules::deserialize`].
159    pub fn serialize(&self) -> Result<Vec<u8>, SerializationError> {
160        let mut bytes = Vec::new();
161        self.serialize_into(&mut bytes)?;
162        Ok(bytes)
163    }
164
165    /// Deserializes the rules from a sequence of bytes produced by
166    /// [`Rules::serialize`].
167    pub fn deserialize<B>(bytes: B) -> Result<Self, SerializationError>
168    where
169        B: AsRef<[u8]>,
170    {
171        let bytes = bytes.as_ref();
172        let magic = b"YARA-X";
173
174        if bytes.len() < magic.len() || &bytes[0..magic.len()] != magic {
175            return Err(SerializationError::InvalidFormat);
176        }
177
178        #[cfg(feature = "logging")]
179        let start = Instant::now();
180
181        // Skip the magic and deserialize the remaining data.
182        let (mut rules, _len): (Self, usize) =
183            bincode::serde::decode_from_slice(
184                &bytes[magic.len()..],
185                bincode::config::standard(),
186            )?;
187
188        #[cfg(feature = "logging")]
189        info!("Deserialization time: {:?}", Instant::elapsed(&start));
190
191        // `rules.compiled_wasm_mod` can be `None` for two reasons:
192        //
193        //  1- The rules were serialized without compiled rules (i.e: the
194        //     `native-code-serialization` feature was disabled, which is
195        //     the default).
196        //
197        //  2- The rules were serialized with compiled rules, but they were
198        //     compiled for a different platform, and `deserialize_wasm_mod`
199        //     returned `None`.
200        //
201        // In both cases we try to build the module again from the data in
202        // `rules.wasm_mode`.
203        if rules.compiled_wasm_mod.is_none() {
204            #[cfg(feature = "logging")]
205            let start = Instant::now();
206
207            rules.compiled_wasm_mod = Some(wasmtime::Module::from_binary(
208                wasm::get_engine(),
209                rules.wasm_mod.as_slice(),
210            )?);
211
212            #[cfg(feature = "logging")]
213            info!("WASM build time: {:?}", Instant::elapsed(&start));
214        }
215
216        rules.build_ac_automaton();
217
218        Ok(rules)
219    }
220
221    /// Serializes the rules into a `writer`.
222    pub fn serialize_into<W>(
223        &self,
224        writer: W,
225    ) -> Result<(), SerializationError>
226    where
227        W: Write,
228    {
229        let mut writer = BufWriter::new(writer);
230
231        // Write file header.
232        writer.write_all(b"YARA-X")?;
233
234        bincode::serde::encode_into_std_write(
235            self,
236            &mut writer,
237            bincode::config::standard(),
238        )?;
239
240        Ok(())
241    }
242
243    /// Deserializes the rules from a `reader`.
244    pub fn deserialize_from<R>(
245        mut reader: R,
246    ) -> Result<Self, SerializationError>
247    where
248        R: Read,
249    {
250        let mut bytes = Vec::new();
251        let _ = reader.read_to_end(&mut bytes)?;
252        Self::deserialize(bytes)
253    }
254
255    /// Returns an iterator that yields the compiled rules.
256    ///
257    /// ```rust
258    /// # use yara_x::Compiler;
259    /// let mut compiler = Compiler::new();
260    ///
261    /// assert!(compiler
262    ///     .add_source("rule foo {condition: true}")
263    ///     .unwrap()
264    ///     .add_source("rule bar {condition: true}")
265    ///     .is_ok());
266    ///
267    /// let rules = compiler.build();
268    /// let mut iter = rules.iter();
269    ///
270    /// assert_eq!(iter.len(), 2);
271    /// assert_eq!(iter.next().map(|r| r.identifier()), Some("foo"));
272    /// assert_eq!(iter.next().map(|r| r.identifier()), Some("bar"));
273    /// ```
274    pub fn iter(&self) -> RulesIter<'_> {
275        RulesIter { rules: self, iterator: self.rules.iter() }
276    }
277
278    /// Returns a [`RuleInfo`] given its [`RuleId`].
279    ///
280    /// # Panics
281    ///
282    /// If no rule with such [`RuleId`] exists.
283    pub(crate) fn get(&self, rule_id: RuleId) -> &RuleInfo {
284        self.rules.get(rule_id.0 as usize).unwrap()
285    }
286
287    /// Returns a regular expression by [`RegexpId`].
288    ///
289    /// # Panics
290    ///
291    /// If no regular expression with such [`RegexpId`] exists.
292    #[inline]
293    pub(crate) fn get_regexp(&self, regexp_id: RegexpId) -> Regex {
294        let re = types::Regexp::new(self.regexp_pool.get(regexp_id).unwrap());
295
296        let parser = re::parser::Parser::new()
297            .relaxed_re_syntax(self.relaxed_re_syntax);
298
299        let hir = parser.parse(&re).unwrap().into_inner();
300
301        // Set a size limit for the NFA automata. The default limit (10MB) is
302        // too small for certain regexps seen in YARA rules in the wild, see:
303        // https://github.com/VirusTotal/yara-x/issues/85
304        let config = regex_automata::meta::Config::new()
305            .nfa_size_limit(Some(50 * 1024 * 1024));
306
307        regex_automata::meta::Builder::new()
308            .configure(config)
309            .build_from_hir(&hir)
310            .unwrap_or_else(|err| {
311                panic!("error compiling regex `{}`: {:#?}", re.as_str(), err)
312            })
313    }
314
315    /// Returns a sub-pattern by [`SubPatternId`].
316    #[inline]
317    pub(crate) fn get_sub_pattern(
318        &self,
319        sub_pattern_id: SubPatternId,
320    ) -> &(PatternId, SubPattern) {
321        unsafe { self.sub_patterns.get_unchecked(sub_pattern_id.0 as usize) }
322    }
323
324    /// Given a [`SubPatternId`], returns the [`RuleId`] corresponding to the
325    /// rule that contains the sub-pattern, and the [`IdentId`] for the pattern's
326    /// identifier.
327    ///
328    /// This operation is slow, because it implies iterating over all the rules
329    /// and their sub-patterns until finding the one we are looking for.
330    #[cfg(feature = "logging")]
331    pub(crate) fn get_rule_and_pattern_by_sub_pattern_id(
332        &self,
333        sub_pattern_id: SubPatternId,
334    ) -> Option<(RuleId, IdentId)> {
335        let (target_pattern_id, _) = self.get_sub_pattern(sub_pattern_id);
336        for (rule_id, rule) in self.rules.iter().enumerate() {
337            for p in &rule.patterns {
338                if p.pattern_id == *target_pattern_id {
339                    return Some((rule_id.into(), p.ident_id));
340                };
341            }
342        }
343        None
344    }
345
346    #[cfg(feature = "rules-profiling")]
347    #[inline]
348    pub(crate) fn rules(&self) -> &[RuleInfo] {
349        self.rules.as_slice()
350    }
351
352    #[inline]
353    pub(crate) fn atoms(&self) -> &[SubPatternAtom] {
354        self.atoms.as_slice()
355    }
356
357    #[inline]
358    pub(crate) fn anchored_sub_patterns(&self) -> &[SubPatternId] {
359        self.anchored_sub_patterns.as_slice()
360    }
361
362    #[inline]
363    pub(crate) fn re_code(&self) -> &[u8] {
364        self.re_code.as_slice()
365    }
366
367    #[inline]
368    pub(crate) fn num_rules(&self) -> usize {
369        self.rules.len()
370    }
371
372    #[inline]
373    pub(crate) fn num_patterns(&self) -> usize {
374        self.num_patterns
375    }
376
377    /// Returns the Aho-Corasick automaton that allows to search for pattern
378    /// atoms.
379    #[inline]
380    pub(crate) fn ac_automaton(&self) -> &AhoCorasick {
381        self.ac.as_ref().expect("Aho-Corasick automaton not compiled")
382    }
383
384    pub(crate) fn build_ac_automaton(&mut self) {
385        if self.ac.is_some() {
386            return;
387        }
388
389        #[cfg(feature = "logging")]
390        let start = Instant::now();
391
392        #[cfg(feature = "logging")]
393        let mut num_atoms = [0_usize; 6];
394
395        let atoms = self.atoms.iter().map(|x| {
396            #[cfg(feature = "logging")]
397            {
398                match x.atom.len() {
399                    atom_len @ 0..=4 => num_atoms[atom_len] += 1,
400                    _ => num_atoms[num_atoms.len() - 1] += 1,
401                }
402
403                if x.atom.len() < 2 {
404                    let (rule_id, pattern_ident_id) = self
405                        .get_rule_and_pattern_by_sub_pattern_id(
406                            x.sub_pattern_id,
407                        )
408                        .unwrap();
409
410                    let rule = self.get(rule_id);
411
412                    info!(
413                            "Very short atom in pattern `{}` in rule `{}:{}` (length: {})",
414                            self.ident_pool.get(pattern_ident_id).unwrap(),
415                            self.ident_pool
416                                .get(rule.namespace_ident_id)
417                                .unwrap(),
418                            self.ident_pool.get(rule.ident_id).unwrap(),
419                            x.atom.len()
420                        );
421                }
422            }
423
424            x.atom.as_ref()
425        });
426
427        self.ac = Some(
428            AhoCorasick::new(atoms)
429                .expect("failed to build Aho-Corasick automaton"),
430        );
431
432        #[cfg(feature = "logging")]
433        {
434            info!(
435                "Aho-Corasick automaton build time: {:?}",
436                Instant::elapsed(&start)
437            );
438
439            info!("Number of rules: {}", self.num_rules());
440            info!("Number of patterns: {}", self.num_patterns());
441            info!(
442                "Number of anchored sub-patterns: {}",
443                self.anchored_sub_patterns.len()
444            );
445            info!("Number of atoms: {}", self.atoms.len());
446            info!("Atoms with len = 0: {}", num_atoms[0]);
447            info!("Atoms with len = 1: {}", num_atoms[1]);
448            info!("Atoms with len = 2: {}", num_atoms[2]);
449            info!("Atoms with len = 3: {}", num_atoms[3]);
450            info!("Atoms with len = 4: {}", num_atoms[4]);
451            info!("Atoms with len > 4: {}", num_atoms[5]);
452        }
453    }
454
455    #[inline]
456    pub(crate) fn lit_pool(&self) -> &BStringPool<LiteralId> {
457        &self.lit_pool
458    }
459
460    #[inline]
461    pub(crate) fn ident_pool(&self) -> &StringPool<IdentId> {
462        &self.ident_pool
463    }
464
465    #[inline]
466    pub(crate) fn globals(&self) -> types::Struct {
467        let (globals, _): (types::Struct, usize) =
468            bincode::serde::decode_from_slice(
469                self.serialized_globals.as_slice(),
470                bincode::config::standard(),
471            )
472            .expect("error deserializing global variables");
473        globals
474    }
475
476    #[inline]
477    pub(crate) fn wasm_mod(&self) -> &wasmtime::Module {
478        self.compiled_wasm_mod.as_ref().unwrap()
479    }
480
481    #[inline]
482    pub(crate) fn filesize_bounds(
483        &self,
484        pattern_id: PatternId,
485    ) -> Option<&FilesizeBounds> {
486        self.filesize_bounds.get(&pattern_id)
487    }
488}
489
490#[cfg(feature = "native-code-serialization")]
491fn serialize_wasm_mod<S>(
492    wasm_mod: &Option<wasmtime::Module>,
493    serializer: S,
494) -> Result<S::Ok, S::Error>
495where
496    S: Serializer,
497{
498    if let Some(wasm_mod) = wasm_mod {
499        let bytes = wasm_mod
500            .serialize()
501            .map_err(|err| serde::ser::Error::custom(err.to_string()))?;
502
503        serializer.serialize_some(bytes.as_slice())
504    } else {
505        serializer.serialize_none()
506    }
507}
508
509#[cfg(not(feature = "native-code-serialization"))]
510fn serialize_wasm_mod<S>(
511    _wasm_mod: &Option<wasmtime::Module>,
512    serializer: S,
513) -> Result<S::Ok, S::Error>
514where
515    S: Serializer,
516{
517    serializer.serialize_none()
518}
519
520pub fn deserialize_wasm_mod<'de, D>(
521    deserializer: D,
522) -> Result<Option<wasmtime::Module>, D::Error>
523where
524    D: Deserializer<'de>,
525{
526    let bytes: Option<&[u8]> = Deserialize::deserialize(deserializer)?;
527    let module = if let Some(bytes) = bytes {
528        unsafe {
529            wasmtime::Module::deserialize(wasm::get_engine(), bytes).ok()
530        }
531    } else {
532        None
533    };
534
535    Ok(module)
536}
537
538/// Iterator that yields the of the compiled rules.
539pub struct RulesIter<'a> {
540    rules: &'a Rules,
541    iterator: Iter<'a, RuleInfo>,
542}
543
544impl<'a> Iterator for RulesIter<'a> {
545    type Item = Rule<'a, 'a>;
546
547    fn next(&mut self) -> Option<Self::Item> {
548        Some(Rule {
549            ctx: None,
550            rules: self.rules,
551            rule_info: self.iterator.next()?,
552        })
553    }
554}
555
556impl ExactSizeIterator for RulesIter<'_> {
557    #[inline]
558    fn len(&self) -> usize {
559        self.iterator.len()
560    }
561}
562
563impl fmt::Debug for Rules {
564    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
565        for (id, rule) in self.rules.iter().enumerate() {
566            let name = self.ident_pool.get(rule.ident_id).unwrap();
567            let namespace =
568                self.ident_pool.get(rule.namespace_ident_id).unwrap();
569            writeln!(f, "RuleId({id})")?;
570            writeln!(f, "  namespace: {namespace}")?;
571            writeln!(f, "  name: {name}")?;
572            writeln!(f, "  patterns:")?;
573            for pattern in &rule.patterns {
574                let ident = self.ident_pool.get(pattern.ident_id).unwrap();
575                writeln!(f, "    {:?} {ident} ", pattern.pattern_id)?;
576            }
577        }
578
579        for (id, (pattern_id, _)) in self.sub_patterns.iter().enumerate() {
580            writeln!(f, "SubPatternId({id}) -> {pattern_id:?}")?;
581        }
582
583        Ok(())
584    }
585}
586
587/// Metadata values.
588#[derive(Serialize, Deserialize)]
589pub(crate) enum MetaValue {
590    Bool(bool),
591    Integer(i64),
592    Float(f64),
593    String(LiteralId),
594    Bytes(LiteralId),
595}
596
597/// Information about each of the individual rules included in [`Rules`].
598#[derive(Serialize, Deserialize)]
599pub(crate) struct RuleInfo {
600    /// The ID of the namespace the rule belongs to.
601    pub namespace_id: NamespaceId,
602    /// The ID of the rule namespace in the identifiers pool.
603    pub namespace_ident_id: IdentId,
604    /// The ID of the rule identifier in the identifiers pool.
605    pub ident_id: IdentId,
606    /// Tags associated to the rule.
607    pub tags: Vec<IdentId>,
608    /// Reference to the rule identifier in the source code. This field is
609    /// ignored while serializing and deserializing compiles rules, as it
610    /// is used only during the compilation phase, but not during the scan
611    /// phase.
612    #[serde(skip)]
613    pub ident_ref: CodeLoc,
614    /// Metadata associated to the rule.
615    pub metadata: Vec<(IdentId, MetaValue)>,
616    /// Vector with all the patterns defined by this rule. The bool in the
617    /// tuple indicates if the pattern is private.
618    pub patterns: Vec<PatternInfo>,
619    /// Number of private patterns in the rule. The number of non-private
620    /// patterns can be computed as patterns.len - num_private_patterns.
621    pub num_private_patterns: usize,
622    /// True if the rule is global.
623    pub is_global: bool,
624    /// True if the rule is private.
625    pub is_private: bool,
626}
627
628/// Information about each of pattern in a rule.
629#[derive(Serialize, Deserialize)]
630pub(crate) struct PatternInfo {
631    /// Unique ID for this pattern.
632    pub pattern_id: PatternId,
633    /// The pattern identifier.
634    pub ident_id: IdentId,
635    /// Indicates if the pattern is text, hex or regexp.
636    pub kind: PatternKind,
637    /// True if the pattern is private.
638    pub is_private: bool,
639}
640
641/// Describes the bounds for `filesize` imposed by a rule condition.
642///
643/// For example, the condition `filesize < 1000 and $a` only matches files
644/// smaller than 10MB. That would be represented by:
645///
646/// ```text
647/// FilesizeBounds { start: Bound::Unbounded, end: Bound::Excluded(1000) }
648/// ```
649///
650/// In contrast, the condition `filesize < 1000 or $a` does not any bounds
651/// to `filesize`, since the use of `or` allows files larger than
652/// 10MB to also match. This case is represented by:
653///
654/// ```text
655/// FilesizeBounds { start: Bound::Unbounded, end: Bound::Unbounded }
656/// ```
657#[derive(Debug, PartialEq, Serialize, Deserialize, Clone, Hash, Eq)]
658pub(crate) struct FilesizeBounds {
659    start: Bound<i64>,
660    end: Bound<i64>,
661}
662
663impl Default for FilesizeBounds {
664    fn default() -> Self {
665        Self { start: Bound::Unbounded, end: Bound::Unbounded }
666    }
667}
668
669impl<T: RangeBounds<i64>> From<T> for FilesizeBounds {
670    fn from(value: T) -> Self {
671        Self {
672            start: value.start_bound().cloned(),
673            end: value.end_bound().cloned(),
674        }
675    }
676}
677
678impl FilesizeBounds {
679    pub fn unbounded(&self) -> bool {
680        matches!(self.start, Bound::Unbounded)
681            && matches!(self.end, Bound::Unbounded)
682    }
683
684    pub fn contains(&self, value: i64) -> bool {
685        let start_ok = match self.start {
686            Bound::Included(start) => value >= start,
687            Bound::Excluded(start) => value > start,
688            Bound::Unbounded => true,
689        };
690
691        let end_ok = match self.end {
692            Bound::Included(end) => value <= end,
693            Bound::Excluded(end) => value < end,
694            Bound::Unbounded => true,
695        };
696
697        start_ok && end_ok
698    }
699    pub fn max_start(&mut self, bound: Bound<i64>) -> &mut Self {
700        match (&self.start, &bound) {
701            (Bound::Included(current), Bound::Included(new)) => {
702                if new > current {
703                    self.start = Bound::Included(*new);
704                }
705            }
706            (Bound::Included(current), Bound::Excluded(new)) => {
707                if new >= current {
708                    self.start = Bound::Excluded(*new);
709                }
710            }
711            (Bound::Excluded(current), Bound::Included(new)) => {
712                if new > current {
713                    self.start = Bound::Included(*new);
714                }
715            }
716            (Bound::Excluded(current), Bound::Excluded(new)) => {
717                if new > current {
718                    self.start = Bound::Excluded(*new);
719                }
720            }
721            (Bound::Unbounded, new) => {
722                self.start = *new;
723            }
724            (_, Bound::Unbounded) => {}
725        }
726        self
727    }
728
729    pub fn min_end(&mut self, bound: Bound<i64>) -> &mut Self {
730        match (&self.end, &bound) {
731            (Bound::Included(current), Bound::Included(new)) => {
732                if new < current {
733                    self.end = Bound::Included(*new);
734                }
735            }
736            (Bound::Included(current), Bound::Excluded(new)) => {
737                if new <= current {
738                    self.end = Bound::Excluded(*new);
739                }
740            }
741            (Bound::Excluded(current), Bound::Included(new)) => {
742                if new < current {
743                    self.end = Bound::Included(*new);
744                }
745            }
746            (Bound::Excluded(current), Bound::Excluded(new)) => {
747                if new < current {
748                    self.end = Bound::Excluded(*new)
749                }
750            }
751            (Bound::Unbounded, new) => {
752                self.end = *new;
753            }
754            (_, Bound::Unbounded) => {}
755        }
756        self
757    }
758}
759
760/// Represents an atom extracted from a pattern and added to the Aho-Corasick
761/// automata.
762///
763/// Each time the Aho-Corasick finds one of these atoms, it proceeds to verify
764/// if the corresponding sub-pattern actually matches or not. The verification
765/// process depend on the type of sub-pattern.
766#[derive(Serialize, Deserialize)]
767pub(crate) struct SubPatternAtom {
768    /// The [`SubPatternId`] that identifies the sub-pattern this atom
769    /// belongs to.
770    sub_pattern_id: SubPatternId,
771    /// The atom itself.
772    atom: Atom,
773    /// The index within `re_code` where the forward code for this atom starts.
774    fwd_code: Option<FwdCodeLoc>,
775    /// The index within `re_code` where the backward code for this atom starts.
776    bck_code: Option<BckCodeLoc>,
777}
778
779impl SubPatternAtom {
780    #[inline]
781    pub(crate) fn from_atom(sub_pattern_id: SubPatternId, atom: Atom) -> Self {
782        Self { sub_pattern_id, atom, bck_code: None, fwd_code: None }
783    }
784
785    pub(crate) fn from_regexp_atom(
786        sub_pattern_id: SubPatternId,
787        value: RegexpAtom,
788    ) -> Self {
789        Self {
790            sub_pattern_id,
791            atom: value.atom,
792            fwd_code: value.fwd_code,
793            bck_code: value.bck_code,
794        }
795    }
796
797    #[inline]
798    pub(crate) fn sub_pattern_id(&self) -> SubPatternId {
799        self.sub_pattern_id
800    }
801
802    #[cfg(feature = "exact-atoms")]
803    #[inline]
804    pub(crate) fn is_exact(&self) -> bool {
805        self.atom.is_exact()
806    }
807
808    #[inline]
809    pub(crate) fn len(&self) -> usize {
810        self.atom.len()
811    }
812
813    #[inline]
814    pub(crate) fn backtrack(&self) -> usize {
815        self.atom.backtrack() as usize
816    }
817
818    #[inline]
819    pub(crate) fn as_slice(&self) -> &[u8] {
820        self.atom.as_ref()
821    }
822
823    #[inline]
824    pub(crate) fn fwd_code(&self) -> Option<FwdCodeLoc> {
825        self.fwd_code
826    }
827
828    #[inline]
829    pub(crate) fn bck_code(&self) -> Option<BckCodeLoc> {
830        self.bck_code
831    }
832}