Skip to main content

rsigma_convert/
backend.rs

1use std::collections::HashMap;
2use std::sync::Mutex;
3
4use rsigma_eval::pipeline::state::PipelineState;
5use rsigma_parser::*;
6
7use crate::error::{ConvertError, Result};
8use crate::state::{ConversionState, ConvertResult};
9
10/// Process-wide cache for compiled regexes keyed by pattern string.
11static REGEX_CACHE: Mutex<Option<HashMap<&'static str, regex::Regex>>> = Mutex::new(None);
12
13fn get_cached_regex(pattern: &'static str) -> Option<regex::Regex> {
14    let mut guard = REGEX_CACHE.lock().unwrap();
15    let cache = guard.get_or_insert_with(HashMap::new);
16    if let Some(re) = cache.get(pattern) {
17        return Some(re.clone());
18    }
19    match regex::Regex::new(pattern) {
20        Ok(re) => {
21            cache.insert(pattern, re.clone());
22            Some(re)
23        }
24        Err(_) => None,
25    }
26}
27
28// =============================================================================
29// Token precedence
30// =============================================================================
31
32/// Boolean operator token type, used for precedence-aware grouping.
33#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
34pub enum TokenType {
35    /// Highest precedence (binds tightest).
36    NOT = 0,
37    AND = 1,
38    OR = 2,
39}
40
41// =============================================================================
42// Backend trait
43// =============================================================================
44
45/// Core conversion trait.
46///
47/// Backends implement this to convert parsed Sigma AST nodes into
48/// backend-native query strings. The trait operates on **parsed** types
49/// from `rsigma-parser` because conversion needs the original field names,
50/// modifiers, and values rather than compiled matchers.
51pub trait Backend: Send + Sync {
52    fn name(&self) -> &str;
53    fn formats(&self) -> &[(&str, &str)];
54
55    fn default_format(&self) -> &str {
56        "default"
57    }
58
59    fn requires_pipeline(&self) -> bool {
60        false
61    }
62
63    // --- Detection rule conversion ---
64
65    fn convert_rule(
66        &self,
67        rule: &SigmaRule,
68        output_format: &str,
69        pipeline_state: &PipelineState,
70    ) -> Result<Vec<String>>;
71
72    // --- Condition tree dispatch ---
73
74    fn convert_condition(
75        &self,
76        expr: &ConditionExpr,
77        detections: &HashMap<String, Detection>,
78        state: &mut ConversionState,
79    ) -> Result<String>;
80
81    fn convert_condition_and(&self, exprs: &[String]) -> Result<String>;
82    fn convert_condition_or(&self, exprs: &[String]) -> Result<String>;
83    fn convert_condition_not(&self, expr: &str) -> Result<String>;
84
85    // --- Detection item conversion ---
86
87    fn convert_detection(&self, det: &Detection, state: &mut ConversionState) -> Result<String>;
88
89    fn convert_detection_item(
90        &self,
91        item: &DetectionItem,
92        state: &mut ConversionState,
93    ) -> Result<String>;
94
95    // --- Field/value escaping ---
96
97    fn escape_and_quote_field(&self, field: &str) -> String;
98    fn convert_value_str(&self, value: &SigmaString, state: &ConversionState) -> String;
99    fn convert_value_re(&self, regex: &str, state: &ConversionState) -> String;
100
101    // --- Value-type-specific methods ---
102
103    fn convert_field_eq_str(
104        &self,
105        field: &str,
106        value: &SigmaString,
107        modifiers: &[Modifier],
108        state: &mut ConversionState,
109    ) -> Result<ConvertResult>;
110
111    fn convert_field_eq_str_case_sensitive(
112        &self,
113        field: &str,
114        value: &SigmaString,
115        modifiers: &[Modifier],
116        state: &mut ConversionState,
117    ) -> Result<ConvertResult>;
118
119    fn convert_field_eq_num(
120        &self,
121        field: &str,
122        value: f64,
123        state: &mut ConversionState,
124    ) -> Result<String>;
125
126    fn convert_field_eq_bool(
127        &self,
128        field: &str,
129        value: bool,
130        state: &mut ConversionState,
131    ) -> Result<String>;
132
133    fn convert_field_eq_null(&self, field: &str, state: &mut ConversionState) -> Result<String>;
134
135    fn convert_field_eq_re(
136        &self,
137        field: &str,
138        pattern: &str,
139        flags: &[Modifier],
140        state: &mut ConversionState,
141    ) -> Result<ConvertResult>;
142
143    fn convert_field_eq_cidr(
144        &self,
145        field: &str,
146        cidr: &str,
147        state: &mut ConversionState,
148    ) -> Result<ConvertResult>;
149
150    fn convert_field_compare(
151        &self,
152        field: &str,
153        op: &Modifier,
154        value: f64,
155        state: &mut ConversionState,
156    ) -> Result<String>;
157
158    fn convert_field_exists(
159        &self,
160        field: &str,
161        exists: bool,
162        state: &mut ConversionState,
163    ) -> Result<String>;
164
165    fn convert_field_eq_query_expr(
166        &self,
167        field: &str,
168        expr: &str,
169        id: &str,
170        state: &mut ConversionState,
171    ) -> Result<String>;
172
173    fn convert_field_ref(
174        &self,
175        field1: &str,
176        field2: &str,
177        state: &mut ConversionState,
178    ) -> Result<ConvertResult>;
179
180    fn convert_keyword(&self, value: &SigmaValue, state: &mut ConversionState) -> Result<String>;
181
182    // --- IN-list optimization (optional) ---
183
184    fn convert_condition_as_in_expression(
185        &self,
186        _field: &str,
187        _values: &[&SigmaValue],
188        _is_or: bool,
189        _state: &mut ConversionState,
190    ) -> Result<String> {
191        Err(ConvertError::UnsupportedModifier(
192            "IN expression not supported".into(),
193        ))
194    }
195
196    // --- Query finalization ---
197
198    fn finish_query(
199        &self,
200        rule: &SigmaRule,
201        query: String,
202        state: &ConversionState,
203    ) -> Result<String>;
204
205    fn finalize_query(
206        &self,
207        rule: &SigmaRule,
208        query: String,
209        index: usize,
210        state: &ConversionState,
211        output_format: &str,
212    ) -> Result<String>;
213
214    fn finalize_output(&self, queries: Vec<String>, output_format: &str) -> Result<String>;
215
216    // --- Correlation rule conversion (optional) ---
217
218    fn supports_correlation(&self) -> bool {
219        false
220    }
221
222    fn convert_correlation_rule(
223        &self,
224        _rule: &CorrelationRule,
225        _output_format: &str,
226        _pipeline_state: &PipelineState,
227    ) -> Result<Vec<String>> {
228        Err(ConvertError::UnsupportedCorrelation(
229            "correlation rules not supported by this backend".into(),
230        ))
231    }
232}
233
234// =============================================================================
235// TextQueryConfig
236// =============================================================================
237
238/// Configuration tokens for text-based query backends.
239///
240/// Mirrors pySigma's `TextQueryBackend` class variables. Backends create a
241/// `const` or `static` instance of this struct and delegate to the
242/// `text_convert_*` free functions for the default conversion logic.
243pub struct TextQueryConfig {
244    // --- Precedence and grouping ---
245    pub precedence: (TokenType, TokenType, TokenType),
246    pub group_expression: &'static str,
247    pub token_separator: &'static str,
248
249    // --- Boolean operators ---
250    pub and_token: &'static str,
251    pub or_token: &'static str,
252    pub not_token: &'static str,
253    pub eq_token: &'static str,
254
255    // --- Negation expressions ---
256    pub not_eq_token: Option<&'static str>,
257    pub eq_expression: Option<&'static str>,
258    pub not_eq_expression: Option<&'static str>,
259    pub convert_not_as_not_eq: bool,
260
261    // --- Wildcards ---
262    pub wildcard_multi: &'static str,
263    pub wildcard_single: &'static str,
264
265    // --- String quoting and escaping ---
266    pub str_quote: &'static str,
267    pub str_quote_pattern: Option<&'static str>,
268    pub str_quote_pattern_negation: bool,
269    pub escape_char: &'static str,
270    pub add_escaped: &'static [&'static str],
271    pub filter_chars: &'static [&'static str],
272
273    // --- Field name quoting and escaping ---
274    pub field_quote: Option<&'static str>,
275    pub field_quote_pattern: Option<&'static str>,
276    pub field_quote_pattern_negation: bool,
277    pub field_escape: Option<&'static str>,
278    pub field_escape_pattern: Option<&'static str>,
279
280    // --- String match expressions ---
281    pub startswith_expression: Option<&'static str>,
282    pub not_startswith_expression: Option<&'static str>,
283    pub startswith_expression_allow_special: bool,
284    pub endswith_expression: Option<&'static str>,
285    pub not_endswith_expression: Option<&'static str>,
286    pub endswith_expression_allow_special: bool,
287    pub contains_expression: Option<&'static str>,
288    pub not_contains_expression: Option<&'static str>,
289    pub contains_expression_allow_special: bool,
290    pub wildcard_match_expression: Option<&'static str>,
291
292    // --- Case-sensitive match expressions ---
293    pub case_sensitive_match_expression: Option<&'static str>,
294    pub case_sensitive_startswith_expression: Option<&'static str>,
295    pub case_sensitive_endswith_expression: Option<&'static str>,
296    pub case_sensitive_contains_expression: Option<&'static str>,
297
298    // --- Regex ---
299    pub re_expression: Option<&'static str>,
300    pub not_re_expression: Option<&'static str>,
301    pub re_escape_char: Option<&'static str>,
302    pub re_escape: &'static [&'static str],
303    pub re_escape_escape_char: Option<&'static str>,
304
305    // --- CIDR ---
306    pub cidr_expression: Option<&'static str>,
307    pub not_cidr_expression: Option<&'static str>,
308
309    // --- Null / field existence ---
310    pub field_null_expression: &'static str,
311    pub field_exists_expression: Option<&'static str>,
312    pub field_not_exists_expression: Option<&'static str>,
313
314    // --- Compare operators ---
315    pub compare_op_expression: Option<&'static str>,
316    pub compare_ops: &'static [(&'static str, &'static str)],
317
318    // --- IN-list optimization ---
319    pub convert_or_as_in: bool,
320    pub convert_and_as_in: bool,
321    pub in_expressions_allow_wildcards: bool,
322    pub field_in_list_expression: Option<&'static str>,
323    pub or_in_operator: Option<&'static str>,
324    pub and_in_operator: Option<&'static str>,
325    pub list_separator: &'static str,
326
327    // --- Unbound/keyword ---
328    pub unbound_value_str_expression: Option<&'static str>,
329    pub unbound_value_num_expression: Option<&'static str>,
330    pub unbound_value_re_expression: Option<&'static str>,
331
332    // --- Field-to-field comparison ---
333    pub field_eq_field_expression: Option<&'static str>,
334    pub field_eq_field_escaping_quoting: bool,
335
336    // --- Deferred query parts ---
337    pub deferred_start: Option<&'static str>,
338    pub deferred_separator: Option<&'static str>,
339    pub deferred_only_query: &'static str,
340
341    // --- Bool values ---
342    pub bool_true: &'static str,
343    pub bool_false: &'static str,
344
345    // --- Query envelope ---
346    pub query_expression: &'static str,
347    pub state_defaults: &'static [(&'static str, &'static str)],
348}
349
350impl TextQueryConfig {
351    /// Check if `inner` needs parenthesisation when nested inside `outer`.
352    pub fn needs_grouping(&self, outer: TokenType, inner: TokenType) -> bool {
353        let rank = |t: TokenType| -> u8 {
354            if t == self.precedence.0 {
355                0
356            } else if t == self.precedence.1 {
357                1
358            } else {
359                2
360            }
361        };
362        rank(inner) > rank(outer)
363    }
364}
365
366// =============================================================================
367// Text-backend free functions
368// =============================================================================
369
370/// Escape and optionally quote a field name according to the config.
371pub fn text_escape_and_quote_field(cfg: &TextQueryConfig, field: &str) -> String {
372    let mut escaped = field.to_string();
373
374    if let Some(esc) = cfg.field_escape
375        && let Some(pat) = cfg.field_escape_pattern
376        && let Some(re) = get_cached_regex(pat)
377    {
378        escaped = re
379            .replace_all(&escaped, |_: &regex::Captures| esc)
380            .to_string();
381    }
382
383    if let Some(quote) = cfg.field_quote {
384        let should_quote = match cfg.field_quote_pattern {
385            Some(pat) => {
386                let matches = get_cached_regex(pat)
387                    .map(|re| re.is_match(&escaped))
388                    .unwrap_or(false);
389                if cfg.field_quote_pattern_negation {
390                    !matches
391                } else {
392                    matches
393                }
394            }
395            None => true,
396        };
397        if should_quote {
398            return format!("{quote}{escaped}{quote}");
399        }
400    }
401
402    escaped
403}
404
405/// Convert a `SigmaString` to its text representation, applying escaping and quoting.
406pub fn text_convert_value_str(cfg: &TextQueryConfig, value: &SigmaString) -> String {
407    let mut result = String::new();
408    let mut has_wildcards = false;
409
410    for part in &value.parts {
411        match part {
412            StringPart::Plain(s) => {
413                let mut escaped = String::with_capacity(s.len());
414                for ch in s.chars() {
415                    let ch_str = ch.to_string();
416                    if cfg.filter_chars.contains(&ch_str.as_str()) {
417                        continue;
418                    }
419                    if ch_str == cfg.escape_char
420                        || ch_str == cfg.str_quote
421                        || cfg.add_escaped.contains(&ch_str.as_str())
422                    {
423                        escaped.push_str(cfg.escape_char);
424                    }
425                    escaped.push(ch);
426                }
427                result.push_str(&escaped);
428            }
429            StringPart::Special(SpecialChar::WildcardMulti) => {
430                result.push_str(cfg.wildcard_multi);
431                has_wildcards = true;
432            }
433            StringPart::Special(SpecialChar::WildcardSingle) => {
434                result.push_str(cfg.wildcard_single);
435                has_wildcards = true;
436            }
437        }
438    }
439
440    if !has_wildcards {
441        let should_quote = match cfg.str_quote_pattern {
442            Some(pat) => {
443                let matches = get_cached_regex(pat)
444                    .map(|re| re.is_match(&result))
445                    .unwrap_or(false);
446                if cfg.str_quote_pattern_negation {
447                    !matches
448                } else {
449                    matches
450                }
451            }
452            None => true,
453        };
454        if should_quote {
455            return format!("{}{result}{}", cfg.str_quote, cfg.str_quote);
456        }
457    }
458
459    result
460}
461
462/// Escape a regex pattern according to the config.
463pub fn text_convert_value_re(cfg: &TextQueryConfig, regex_str: &str) -> String {
464    let mut result = regex_str.to_string();
465
466    if let Some(esc_esc) = cfg.re_escape_escape_char
467        && let Some(esc) = cfg.re_escape_char
468    {
469        result = result.replace(esc, &format!("{esc_esc}{esc}"));
470    }
471
472    if let Some(esc) = cfg.re_escape_char {
473        for pattern in cfg.re_escape {
474            result = result.replace(pattern, &format!("{esc}{pattern}"));
475        }
476    }
477
478    result
479}
480
481/// Precedence-aware grouping.
482pub fn text_convert_condition_group(
483    cfg: &TextQueryConfig,
484    expr: &str,
485    outer: TokenType,
486    inner: TokenType,
487) -> String {
488    if cfg.needs_grouping(outer, inner) {
489        cfg.group_expression.replace("{expr}", expr)
490    } else {
491        expr.to_string()
492    }
493}
494
495/// Join expressions with the AND token.
496pub fn text_convert_condition_and(cfg: &TextQueryConfig, exprs: &[String]) -> String {
497    let sep = if cfg.and_token.is_empty() {
498        cfg.token_separator.to_string()
499    } else {
500        format!(
501            "{}{}{}",
502            cfg.token_separator, cfg.and_token, cfg.token_separator
503        )
504    };
505    exprs.join(&sep)
506}
507
508/// Join expressions with the OR token.
509pub fn text_convert_condition_or(cfg: &TextQueryConfig, exprs: &[String]) -> String {
510    let sep = format!(
511        "{}{}{}",
512        cfg.token_separator, cfg.or_token, cfg.token_separator
513    );
514    exprs.join(&sep)
515}
516
517/// Negate an expression with the NOT token.
518pub fn text_convert_condition_not(cfg: &TextQueryConfig, expr: &str) -> String {
519    format!("{}{}{expr}", cfg.not_token, cfg.token_separator)
520}
521
522/// Assemble the final query from the main condition string and any deferred parts.
523pub fn text_finish_query(
524    cfg: &TextQueryConfig,
525    query: &str,
526    state: &ConversionState,
527    rule: &SigmaRule,
528) -> String {
529    let main_query = if state.has_deferred() && query.is_empty() {
530        cfg.deferred_only_query
531    } else {
532        query
533    };
534
535    let mut result = cfg.query_expression.replace("{query}", main_query);
536
537    // Substitute state defaults first, then actual state values
538    for (key, default) in cfg.state_defaults {
539        let placeholder = format!("{{{key}}}");
540        result = result.replace(&placeholder, default);
541    }
542    for (key, val) in &state.processing_state {
543        if let Some(s) = val.as_str() {
544            let placeholder = format!("{{{key}}}");
545            result = result.replace(&placeholder, s);
546        }
547    }
548
549    // Substitute rule metadata
550    result = result.replace("{rule.title}", &rule.title);
551    if let Some(id) = &rule.id {
552        result = result.replace("{rule.id}", id);
553    }
554
555    // Append deferred parts
556    if state.has_deferred() {
557        let deferred_start = cfg.deferred_start.unwrap_or("");
558        let deferred_sep = cfg.deferred_separator.unwrap_or("");
559        let parts: Vec<String> = state.deferred.iter().map(|d| d.finalize()).collect();
560        result = format!("{result}{deferred_start}{}", parts.join(deferred_sep));
561    }
562
563    result
564}
565
566/// Dispatch string matching based on modifiers and wildcard positions.
567///
568/// Returns the query fragment for a field=value comparison, handling
569/// `contains`, `startswith`, `endswith`, and wildcard patterns.
570pub fn text_convert_field_eq_str(
571    cfg: &TextQueryConfig,
572    field: &str,
573    value: &SigmaString,
574    modifiers: &[Modifier],
575    _state: &ConversionState,
576) -> Result<ConvertResult> {
577    let escaped_field = text_escape_and_quote_field(cfg, field);
578    let is_cased = modifiers.contains(&Modifier::Cased);
579    let is_contains = modifiers.contains(&Modifier::Contains);
580    let is_startswith = modifiers.contains(&Modifier::StartsWith);
581    let is_endswith = modifiers.contains(&Modifier::EndsWith);
582
583    let value_str = text_convert_value_str(cfg, value);
584
585    // Case-sensitive dispatch
586    if is_cased {
587        if is_contains && let Some(expr) = cfg.case_sensitive_contains_expression {
588            return Ok(ConvertResult::Query(
589                expr.replace("{field}", &escaped_field)
590                    .replace("{value}", &value_str),
591            ));
592        }
593        if is_startswith && let Some(expr) = cfg.case_sensitive_startswith_expression {
594            return Ok(ConvertResult::Query(
595                expr.replace("{field}", &escaped_field)
596                    .replace("{value}", &value_str),
597            ));
598        }
599        if is_endswith && let Some(expr) = cfg.case_sensitive_endswith_expression {
600            return Ok(ConvertResult::Query(
601                expr.replace("{field}", &escaped_field)
602                    .replace("{value}", &value_str),
603            ));
604        }
605        if let Some(expr) = cfg.case_sensitive_match_expression {
606            return Ok(ConvertResult::Query(
607                expr.replace("{field}", &escaped_field)
608                    .replace("{value}", &value_str),
609            ));
610        }
611    }
612
613    // Case-insensitive dispatch (default)
614    if is_contains && let Some(expr) = cfg.contains_expression {
615        return Ok(ConvertResult::Query(
616            expr.replace("{field}", &escaped_field)
617                .replace("{value}", &value_str),
618        ));
619    }
620    if is_startswith && let Some(expr) = cfg.startswith_expression {
621        return Ok(ConvertResult::Query(
622            expr.replace("{field}", &escaped_field)
623                .replace("{value}", &value_str),
624        ));
625    }
626    if is_endswith && let Some(expr) = cfg.endswith_expression {
627        return Ok(ConvertResult::Query(
628            expr.replace("{field}", &escaped_field)
629                .replace("{value}", &value_str),
630        ));
631    }
632
633    // Wildcard match fallback
634    if value.contains_wildcards()
635        && let Some(expr) = cfg.wildcard_match_expression
636    {
637        return Ok(ConvertResult::Query(
638            expr.replace("{field}", &escaped_field)
639                .replace("{value}", &value_str),
640        ));
641    }
642
643    // Exact match (default)
644    let result = if let Some(expr) = cfg.eq_expression {
645        expr.replace("{field}", &escaped_field)
646            .replace("{value}", &value_str)
647    } else {
648        format!("{escaped_field}{}{value_str}", cfg.eq_token)
649    };
650    Ok(ConvertResult::Query(result))
651}