Skip to main content

rsigma_convert/
backend.rs

1use std::collections::HashMap;
2use std::sync::Mutex;
3
4use rsigma_eval::pipeline::state::PipelineState;
5use rsigma_parser::*;
6
7use crate::error::{ConvertError, Result};
8use crate::state::{ConversionState, ConvertResult};
9
10/// Process-wide cache for compiled regexes keyed by pattern string.
11static REGEX_CACHE: Mutex<Option<HashMap<&'static str, regex::Regex>>> = Mutex::new(None);
12
13fn get_cached_regex(pattern: &'static str) -> Option<regex::Regex> {
14    let mut guard = REGEX_CACHE.lock().unwrap();
15    let cache = guard.get_or_insert_with(HashMap::new);
16    if let Some(re) = cache.get(pattern) {
17        return Some(re.clone());
18    }
19    match regex::Regex::new(pattern) {
20        Ok(re) => {
21            cache.insert(pattern, re.clone());
22            Some(re)
23        }
24        Err(_) => None,
25    }
26}
27
28// =============================================================================
29// Token precedence
30// =============================================================================
31
32/// Boolean operator token type, used for precedence-aware grouping.
33#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
34pub enum TokenType {
35    /// Highest precedence (binds tightest).
36    NOT = 0,
37    AND = 1,
38    OR = 2,
39}
40
41// =============================================================================
42// Backend trait
43// =============================================================================
44
45/// Core conversion trait.
46///
47/// Backends implement this to convert parsed Sigma AST nodes into
48/// backend-native query strings. The trait operates on **parsed** types
49/// from `rsigma-parser` because conversion needs the original field names,
50/// modifiers, and values rather than compiled matchers.
51pub trait Backend: Send + Sync {
52    fn name(&self) -> &str;
53    fn formats(&self) -> &[(&str, &str)];
54
55    fn default_format(&self) -> &str {
56        "default"
57    }
58
59    fn requires_pipeline(&self) -> bool {
60        false
61    }
62
63    // --- Detection rule conversion ---
64
65    fn convert_rule(
66        &self,
67        rule: &SigmaRule,
68        output_format: &str,
69        pipeline_state: &PipelineState,
70    ) -> Result<Vec<String>>;
71
72    // --- Condition tree dispatch ---
73
74    fn convert_condition(
75        &self,
76        expr: &ConditionExpr,
77        detections: &HashMap<String, Detection>,
78        state: &mut ConversionState,
79    ) -> Result<String>;
80
81    fn convert_condition_and(&self, exprs: &[String]) -> Result<String>;
82    fn convert_condition_or(&self, exprs: &[String]) -> Result<String>;
83    fn convert_condition_not(&self, expr: &str) -> Result<String>;
84
85    // --- Detection item conversion ---
86
87    fn convert_detection(&self, det: &Detection, state: &mut ConversionState) -> Result<String>;
88
89    fn convert_detection_item(
90        &self,
91        item: &DetectionItem,
92        state: &mut ConversionState,
93    ) -> Result<String>;
94
95    /// Convert an array object-scope match (`field[any]:` / `field[all]:`).
96    ///
97    /// `body` is evaluated against the members of the array at `field`. The
98    /// default implementation reports the construct as unsupported; backends
99    /// that can express member quantification (e.g. PostgreSQL JSONB via
100    /// `jsonb_array_elements` + `EXISTS`) override this. Backends must fail
101    /// loudly here rather than emit a query with different semantics.
102    fn convert_array_match(
103        &self,
104        field: &str,
105        quantifier: ArrayQuantifier,
106        body: &Detection,
107        state: &mut ConversionState,
108    ) -> Result<String> {
109        let _ = (field, quantifier, body, state);
110        Err(ConvertError::UnsupportedArrayMatching)
111    }
112
113    /// Whether this backend can lower a positional array index (`field[N]`) in
114    /// a field path. Backends that cannot must not silently emit a literal
115    /// field reference (which would diverge from the evaluator's element-`N`
116    /// semantics); the default item conversion rejects indexed fields with
117    /// `UnsupportedArrayMatching`. PostgreSQL overrides this for JSONB mode.
118    fn supports_field_index(&self) -> bool {
119        false
120    }
121
122    // --- Field/value escaping ---
123
124    fn escape_and_quote_field(&self, field: &str) -> String;
125    fn convert_value_str(&self, value: &SigmaString, state: &ConversionState) -> String;
126    fn convert_value_re(&self, regex: &str, state: &ConversionState) -> String;
127
128    // --- Value-type-specific methods ---
129
130    fn convert_field_eq_str(
131        &self,
132        field: &str,
133        value: &SigmaString,
134        modifiers: &[Modifier],
135        state: &mut ConversionState,
136    ) -> Result<ConvertResult>;
137
138    fn convert_field_eq_str_case_sensitive(
139        &self,
140        field: &str,
141        value: &SigmaString,
142        modifiers: &[Modifier],
143        state: &mut ConversionState,
144    ) -> Result<ConvertResult>;
145
146    fn convert_field_eq_num(
147        &self,
148        field: &str,
149        value: f64,
150        state: &mut ConversionState,
151    ) -> Result<String>;
152
153    fn convert_field_eq_bool(
154        &self,
155        field: &str,
156        value: bool,
157        state: &mut ConversionState,
158    ) -> Result<String>;
159
160    fn convert_field_eq_null(&self, field: &str, state: &mut ConversionState) -> Result<String>;
161
162    fn convert_field_eq_re(
163        &self,
164        field: &str,
165        pattern: &str,
166        flags: &[Modifier],
167        state: &mut ConversionState,
168    ) -> Result<ConvertResult>;
169
170    fn convert_field_eq_cidr(
171        &self,
172        field: &str,
173        cidr: &str,
174        state: &mut ConversionState,
175    ) -> Result<ConvertResult>;
176
177    fn convert_field_compare(
178        &self,
179        field: &str,
180        op: &Modifier,
181        value: f64,
182        state: &mut ConversionState,
183    ) -> Result<String>;
184
185    fn convert_field_exists(
186        &self,
187        field: &str,
188        exists: bool,
189        state: &mut ConversionState,
190    ) -> Result<String>;
191
192    fn convert_field_eq_query_expr(
193        &self,
194        field: &str,
195        expr: &str,
196        id: &str,
197        state: &mut ConversionState,
198    ) -> Result<String>;
199
200    fn convert_field_ref(
201        &self,
202        field1: &str,
203        field2: &str,
204        state: &mut ConversionState,
205    ) -> Result<ConvertResult>;
206
207    fn convert_keyword(&self, value: &SigmaValue, state: &mut ConversionState) -> Result<String>;
208
209    // --- IN-list optimization (optional) ---
210
211    fn convert_condition_as_in_expression(
212        &self,
213        _field: &str,
214        _values: &[&SigmaValue],
215        _is_or: bool,
216        _state: &mut ConversionState,
217    ) -> Result<String> {
218        Err(ConvertError::UnsupportedModifier(
219            "IN expression not supported".into(),
220        ))
221    }
222
223    // --- Query finalization ---
224
225    fn finish_query(
226        &self,
227        rule: &SigmaRule,
228        query: String,
229        state: &ConversionState,
230    ) -> Result<String>;
231
232    fn finalize_query(
233        &self,
234        rule: &SigmaRule,
235        query: String,
236        index: usize,
237        state: &ConversionState,
238        output_format: &str,
239    ) -> Result<String>;
240
241    fn finalize_output(&self, queries: Vec<String>, output_format: &str) -> Result<String>;
242
243    // --- Correlation rule conversion (optional) ---
244
245    fn supports_correlation(&self) -> bool {
246        false
247    }
248
249    /// Correlation generation methods this backend offers, as
250    /// `(name, description)` pairs, mirroring pySigma's `correlation_methods`.
251    ///
252    /// The converting user selects one with the `correlation_method` backend
253    /// option, which overrides a rule's own `window` hint for that conversion.
254    /// An empty slice (the default) means the backend exposes no per-conversion
255    /// choice.
256    fn correlation_methods(&self) -> &[(&str, &str)] {
257        &[]
258    }
259
260    /// The correlation method used when the converting user selects none.
261    fn default_correlation_method(&self) -> &str {
262        "default"
263    }
264
265    /// Convert a correlation rule, discarding any non-fatal warnings.
266    ///
267    /// Convenience wrapper over [`convert_correlation_rule_with_warnings`]; the
268    /// `convert_collection` entry point uses the warnings-aware form so it can
269    /// surface diagnostics. Backends should override the warnings-aware method,
270    /// not this one.
271    ///
272    /// [`convert_correlation_rule_with_warnings`]: Backend::convert_correlation_rule_with_warnings
273    fn convert_correlation_rule(
274        &self,
275        rule: &CorrelationRule,
276        output_format: &str,
277        pipeline_state: &PipelineState,
278    ) -> Result<Vec<String>> {
279        let mut warnings = Vec::new();
280        self.convert_correlation_rule_with_warnings(
281            rule,
282            output_format,
283            pipeline_state,
284            &mut warnings,
285        )
286    }
287
288    /// Convert a correlation rule, appending any non-fatal diagnostics to
289    /// `warnings`.
290    ///
291    /// A backend pushes a warning when it can only approximate a requested
292    /// feature but still emits a usable query (the Sigma "should warn" case),
293    /// and returns [`ConvertError`] when a feature cannot be represented at all
294    /// (the "must error" case).
295    fn convert_correlation_rule_with_warnings(
296        &self,
297        _rule: &CorrelationRule,
298        _output_format: &str,
299        _pipeline_state: &PipelineState,
300        _warnings: &mut Vec<String>,
301    ) -> Result<Vec<String>> {
302        Err(ConvertError::UnsupportedCorrelation(
303            "correlation rules not supported by this backend".into(),
304        ))
305    }
306}
307
308// =============================================================================
309// TextQueryConfig
310// =============================================================================
311
312/// Configuration tokens for text-based query backends.
313///
314/// Mirrors pySigma's `TextQueryBackend` class variables. Backends create a
315/// `const` or `static` instance of this struct and delegate to the
316/// `text_convert_*` free functions for the default conversion logic.
317pub struct TextQueryConfig {
318    // --- Precedence and grouping ---
319    pub precedence: (TokenType, TokenType, TokenType),
320    pub group_expression: &'static str,
321    pub token_separator: &'static str,
322
323    // --- Boolean operators ---
324    pub and_token: &'static str,
325    pub or_token: &'static str,
326    pub not_token: &'static str,
327    pub eq_token: &'static str,
328
329    // --- Negation expressions ---
330    pub not_eq_token: Option<&'static str>,
331    pub eq_expression: Option<&'static str>,
332    pub not_eq_expression: Option<&'static str>,
333    pub convert_not_as_not_eq: bool,
334
335    // --- Wildcards ---
336    pub wildcard_multi: &'static str,
337    pub wildcard_single: &'static str,
338
339    // --- String quoting and escaping ---
340    pub str_quote: &'static str,
341    pub str_quote_pattern: Option<&'static str>,
342    pub str_quote_pattern_negation: bool,
343    pub escape_char: &'static str,
344    pub add_escaped: &'static [&'static str],
345    pub filter_chars: &'static [&'static str],
346
347    // --- Field name quoting and escaping ---
348    pub field_quote: Option<&'static str>,
349    pub field_quote_pattern: Option<&'static str>,
350    pub field_quote_pattern_negation: bool,
351    pub field_escape: Option<&'static str>,
352    pub field_escape_pattern: Option<&'static str>,
353
354    // --- String match expressions ---
355    pub startswith_expression: Option<&'static str>,
356    pub not_startswith_expression: Option<&'static str>,
357    pub startswith_expression_allow_special: bool,
358    pub endswith_expression: Option<&'static str>,
359    pub not_endswith_expression: Option<&'static str>,
360    pub endswith_expression_allow_special: bool,
361    pub contains_expression: Option<&'static str>,
362    pub not_contains_expression: Option<&'static str>,
363    pub contains_expression_allow_special: bool,
364    pub wildcard_match_expression: Option<&'static str>,
365
366    // --- Case-sensitive match expressions ---
367    pub case_sensitive_match_expression: Option<&'static str>,
368    pub case_sensitive_startswith_expression: Option<&'static str>,
369    pub case_sensitive_endswith_expression: Option<&'static str>,
370    pub case_sensitive_contains_expression: Option<&'static str>,
371
372    // --- Regex ---
373    pub re_expression: Option<&'static str>,
374    pub not_re_expression: Option<&'static str>,
375    pub re_escape_char: Option<&'static str>,
376    pub re_escape: &'static [&'static str],
377    pub re_escape_escape_char: Option<&'static str>,
378
379    // --- CIDR ---
380    pub cidr_expression: Option<&'static str>,
381    pub not_cidr_expression: Option<&'static str>,
382
383    // --- Null / field existence ---
384    pub field_null_expression: &'static str,
385    pub field_exists_expression: Option<&'static str>,
386    pub field_not_exists_expression: Option<&'static str>,
387
388    // --- Compare operators ---
389    pub compare_op_expression: Option<&'static str>,
390    pub compare_ops: &'static [(&'static str, &'static str)],
391
392    // --- IN-list optimization ---
393    pub convert_or_as_in: bool,
394    pub convert_and_as_in: bool,
395    pub in_expressions_allow_wildcards: bool,
396    pub field_in_list_expression: Option<&'static str>,
397    pub or_in_operator: Option<&'static str>,
398    pub and_in_operator: Option<&'static str>,
399    pub list_separator: &'static str,
400
401    // --- Unbound/keyword ---
402    pub unbound_value_str_expression: Option<&'static str>,
403    pub unbound_value_num_expression: Option<&'static str>,
404    pub unbound_value_re_expression: Option<&'static str>,
405
406    // --- Field-to-field comparison ---
407    pub field_eq_field_expression: Option<&'static str>,
408    pub field_eq_field_escaping_quoting: bool,
409
410    // --- Deferred query parts ---
411    pub deferred_start: Option<&'static str>,
412    pub deferred_separator: Option<&'static str>,
413    pub deferred_only_query: &'static str,
414
415    // --- Bool values ---
416    pub bool_true: &'static str,
417    pub bool_false: &'static str,
418
419    // --- Query envelope ---
420    pub query_expression: &'static str,
421    pub state_defaults: &'static [(&'static str, &'static str)],
422}
423
424impl TextQueryConfig {
425    /// Check if `inner` needs parenthesisation when nested inside `outer`.
426    pub fn needs_grouping(&self, outer: TokenType, inner: TokenType) -> bool {
427        let rank = |t: TokenType| -> u8 {
428            if t == self.precedence.0 {
429                0
430            } else if t == self.precedence.1 {
431                1
432            } else {
433                2
434            }
435        };
436        rank(inner) > rank(outer)
437    }
438}
439
440// =============================================================================
441// Text-backend free functions
442// =============================================================================
443
444/// Escape and optionally quote a field name according to the config.
445pub fn text_escape_and_quote_field(cfg: &TextQueryConfig, field: &str) -> String {
446    let mut escaped = field.to_string();
447
448    if let Some(esc) = cfg.field_escape
449        && let Some(pat) = cfg.field_escape_pattern
450        && let Some(re) = get_cached_regex(pat)
451    {
452        escaped = re
453            .replace_all(&escaped, |_: &regex::Captures| esc)
454            .to_string();
455    }
456
457    if let Some(quote) = cfg.field_quote {
458        let should_quote = match cfg.field_quote_pattern {
459            Some(pat) => {
460                let matches = get_cached_regex(pat)
461                    .map(|re| re.is_match(&escaped))
462                    .unwrap_or(false);
463                if cfg.field_quote_pattern_negation {
464                    !matches
465                } else {
466                    matches
467                }
468            }
469            None => true,
470        };
471        if should_quote {
472            return format!("{quote}{escaped}{quote}");
473        }
474    }
475
476    escaped
477}
478
479/// Convert a `SigmaString` to its text representation, applying escaping and quoting.
480pub fn text_convert_value_str(cfg: &TextQueryConfig, value: &SigmaString) -> String {
481    let mut result = String::new();
482    let mut has_wildcards = false;
483
484    for part in &value.parts {
485        match part {
486            StringPart::Plain(s) => {
487                let mut escaped = String::with_capacity(s.len());
488                for ch in s.chars() {
489                    let ch_str = ch.to_string();
490                    if cfg.filter_chars.contains(&ch_str.as_str()) {
491                        continue;
492                    }
493                    if ch_str == cfg.escape_char
494                        || ch_str == cfg.str_quote
495                        || cfg.add_escaped.contains(&ch_str.as_str())
496                    {
497                        escaped.push_str(cfg.escape_char);
498                    }
499                    escaped.push(ch);
500                }
501                result.push_str(&escaped);
502            }
503            StringPart::Special(SpecialChar::WildcardMulti) => {
504                result.push_str(cfg.wildcard_multi);
505                has_wildcards = true;
506            }
507            StringPart::Special(SpecialChar::WildcardSingle) => {
508                result.push_str(cfg.wildcard_single);
509                has_wildcards = true;
510            }
511        }
512    }
513
514    if !has_wildcards {
515        let should_quote = match cfg.str_quote_pattern {
516            Some(pat) => {
517                let matches = get_cached_regex(pat)
518                    .map(|re| re.is_match(&result))
519                    .unwrap_or(false);
520                if cfg.str_quote_pattern_negation {
521                    !matches
522                } else {
523                    matches
524                }
525            }
526            None => true,
527        };
528        if should_quote {
529            return format!("{}{result}{}", cfg.str_quote, cfg.str_quote);
530        }
531    }
532
533    result
534}
535
536/// Escape a regex pattern according to the config.
537pub fn text_convert_value_re(cfg: &TextQueryConfig, regex_str: &str) -> String {
538    let mut result = regex_str.to_string();
539
540    if let Some(esc_esc) = cfg.re_escape_escape_char
541        && let Some(esc) = cfg.re_escape_char
542    {
543        result = result.replace(esc, &format!("{esc_esc}{esc}"));
544    }
545
546    if let Some(esc) = cfg.re_escape_char {
547        for pattern in cfg.re_escape {
548            result = result.replace(pattern, &format!("{esc}{pattern}"));
549        }
550    }
551
552    result
553}
554
555/// Precedence-aware grouping.
556pub fn text_convert_condition_group(
557    cfg: &TextQueryConfig,
558    expr: &str,
559    outer: TokenType,
560    inner: TokenType,
561) -> String {
562    if cfg.needs_grouping(outer, inner) {
563        cfg.group_expression.replace("{expr}", expr)
564    } else {
565        expr.to_string()
566    }
567}
568
569/// Join expressions with the AND token.
570pub fn text_convert_condition_and(cfg: &TextQueryConfig, exprs: &[String]) -> String {
571    let sep = if cfg.and_token.is_empty() {
572        cfg.token_separator.to_string()
573    } else {
574        format!(
575            "{}{}{}",
576            cfg.token_separator, cfg.and_token, cfg.token_separator
577        )
578    };
579    exprs.join(&sep)
580}
581
582/// Join expressions with the OR token.
583pub fn text_convert_condition_or(cfg: &TextQueryConfig, exprs: &[String]) -> String {
584    let sep = format!(
585        "{}{}{}",
586        cfg.token_separator, cfg.or_token, cfg.token_separator
587    );
588    exprs.join(&sep)
589}
590
591/// Negate an expression with the NOT token.
592pub fn text_convert_condition_not(cfg: &TextQueryConfig, expr: &str) -> String {
593    format!("{}{}{expr}", cfg.not_token, cfg.token_separator)
594}
595
596/// Assemble the final query from the main condition string and any deferred parts.
597pub fn text_finish_query(
598    cfg: &TextQueryConfig,
599    query: &str,
600    state: &ConversionState,
601    rule: &SigmaRule,
602) -> String {
603    let main_query = if state.has_deferred() && query.is_empty() {
604        cfg.deferred_only_query
605    } else {
606        query
607    };
608
609    let mut result = cfg.query_expression.replace("{query}", main_query);
610
611    // Substitute state defaults first, then actual state values
612    for (key, default) in cfg.state_defaults {
613        let placeholder = format!("{{{key}}}");
614        result = result.replace(&placeholder, default);
615    }
616    for (key, val) in &state.processing_state {
617        if let Some(s) = val.as_str() {
618            let placeholder = format!("{{{key}}}");
619            result = result.replace(&placeholder, s);
620        }
621    }
622
623    // Substitute rule metadata
624    result = result.replace("{rule.title}", &rule.title);
625    if let Some(id) = &rule.id {
626        result = result.replace("{rule.id}", id);
627    }
628
629    // Append deferred parts
630    if state.has_deferred() {
631        let deferred_start = cfg.deferred_start.unwrap_or("");
632        let deferred_sep = cfg.deferred_separator.unwrap_or("");
633        let parts: Vec<String> = state.deferred.iter().map(|d| d.finalize()).collect();
634        result = format!("{result}{deferred_start}{}", parts.join(deferred_sep));
635    }
636
637    result
638}
639
640/// Dispatch string matching based on modifiers and wildcard positions.
641///
642/// Returns the query fragment for a field=value comparison, handling
643/// `contains`, `startswith`, `endswith`, and wildcard patterns.
644pub fn text_convert_field_eq_str(
645    cfg: &TextQueryConfig,
646    field: &str,
647    value: &SigmaString,
648    modifiers: &[Modifier],
649    _state: &ConversionState,
650) -> Result<ConvertResult> {
651    let escaped_field = text_escape_and_quote_field(cfg, field);
652    let is_cased = modifiers.contains(&Modifier::Cased);
653    let is_contains = modifiers.contains(&Modifier::Contains);
654    let is_startswith = modifiers.contains(&Modifier::StartsWith);
655    let is_endswith = modifiers.contains(&Modifier::EndsWith);
656
657    let value_str = text_convert_value_str(cfg, value);
658
659    // Case-sensitive dispatch
660    if is_cased {
661        if is_contains && let Some(expr) = cfg.case_sensitive_contains_expression {
662            return Ok(ConvertResult::Query(
663                expr.replace("{field}", &escaped_field)
664                    .replace("{value}", &value_str),
665            ));
666        }
667        if is_startswith && let Some(expr) = cfg.case_sensitive_startswith_expression {
668            return Ok(ConvertResult::Query(
669                expr.replace("{field}", &escaped_field)
670                    .replace("{value}", &value_str),
671            ));
672        }
673        if is_endswith && let Some(expr) = cfg.case_sensitive_endswith_expression {
674            return Ok(ConvertResult::Query(
675                expr.replace("{field}", &escaped_field)
676                    .replace("{value}", &value_str),
677            ));
678        }
679        if let Some(expr) = cfg.case_sensitive_match_expression {
680            return Ok(ConvertResult::Query(
681                expr.replace("{field}", &escaped_field)
682                    .replace("{value}", &value_str),
683            ));
684        }
685    }
686
687    // Case-insensitive dispatch (default)
688    if is_contains && let Some(expr) = cfg.contains_expression {
689        return Ok(ConvertResult::Query(
690            expr.replace("{field}", &escaped_field)
691                .replace("{value}", &value_str),
692        ));
693    }
694    if is_startswith && let Some(expr) = cfg.startswith_expression {
695        return Ok(ConvertResult::Query(
696            expr.replace("{field}", &escaped_field)
697                .replace("{value}", &value_str),
698        ));
699    }
700    if is_endswith && let Some(expr) = cfg.endswith_expression {
701        return Ok(ConvertResult::Query(
702            expr.replace("{field}", &escaped_field)
703                .replace("{value}", &value_str),
704        ));
705    }
706
707    // Wildcard match fallback
708    if value.contains_wildcards()
709        && let Some(expr) = cfg.wildcard_match_expression
710    {
711        return Ok(ConvertResult::Query(
712            expr.replace("{field}", &escaped_field)
713                .replace("{value}", &value_str),
714        ));
715    }
716
717    // Exact match (default)
718    let result = if let Some(expr) = cfg.eq_expression {
719        expr.replace("{field}", &escaped_field)
720            .replace("{value}", &value_str)
721    } else {
722        format!("{escaped_field}{}{value_str}", cfg.eq_token)
723    };
724    Ok(ConvertResult::Query(result))
725}