Skip to main content

rsigma_convert/
backend.rs

1use std::collections::HashMap;
2use std::sync::Mutex;
3
4use rsigma_eval::pipeline::state::PipelineState;
5use rsigma_parser::*;
6
7use crate::error::{ConvertError, Result};
8use crate::state::{ConversionState, ConvertResult};
9
10/// Process-wide cache for compiled regexes keyed by pattern string.
11static REGEX_CACHE: Mutex<Option<HashMap<&'static str, regex::Regex>>> = Mutex::new(None);
12
13fn get_cached_regex(pattern: &'static str) -> Option<regex::Regex> {
14    let mut guard = REGEX_CACHE.lock().unwrap();
15    let cache = guard.get_or_insert_with(HashMap::new);
16    if let Some(re) = cache.get(pattern) {
17        return Some(re.clone());
18    }
19    match regex::Regex::new(pattern) {
20        Ok(re) => {
21            cache.insert(pattern, re.clone());
22            Some(re)
23        }
24        Err(_) => None,
25    }
26}
27
28// =============================================================================
29// Token precedence
30// =============================================================================
31
32/// Boolean operator token type, used for precedence-aware grouping.
33#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
34pub enum TokenType {
35    /// Highest precedence (binds tightest).
36    NOT = 0,
37    AND = 1,
38    OR = 2,
39}
40
41// =============================================================================
42// Backend trait
43// =============================================================================
44
45/// Core conversion trait.
46///
47/// Backends implement this to convert parsed Sigma AST nodes into
48/// backend-native query strings. The trait operates on **parsed** types
49/// from `rsigma-parser` because conversion needs the original field names,
50/// modifiers, and values rather than compiled matchers.
51pub trait Backend: Send + Sync {
52    fn name(&self) -> &str;
53    fn formats(&self) -> &[(&str, &str)];
54
55    fn default_format(&self) -> &str {
56        "default"
57    }
58
59    fn requires_pipeline(&self) -> bool {
60        false
61    }
62
63    // --- Detection rule conversion ---
64
65    fn convert_rule(
66        &self,
67        rule: &SigmaRule,
68        output_format: &str,
69        pipeline_state: &PipelineState,
70    ) -> Result<Vec<String>>;
71
72    // --- Condition tree dispatch ---
73
74    fn convert_condition(
75        &self,
76        expr: &ConditionExpr,
77        detections: &HashMap<String, Detection>,
78        state: &mut ConversionState,
79    ) -> Result<String>;
80
81    fn convert_condition_and(&self, exprs: &[String]) -> Result<String>;
82    fn convert_condition_or(&self, exprs: &[String]) -> Result<String>;
83    fn convert_condition_not(&self, expr: &str) -> Result<String>;
84
85    // --- Detection item conversion ---
86
87    fn convert_detection(&self, det: &Detection, state: &mut ConversionState) -> Result<String>;
88
89    fn convert_detection_item(
90        &self,
91        item: &DetectionItem,
92        state: &mut ConversionState,
93    ) -> Result<String>;
94
95    /// Convert an array object-scope match (`field[any]:` / `field[all]:`).
96    ///
97    /// `body` is evaluated against the members of the array at `field`. The
98    /// default implementation reports the construct as unsupported; backends
99    /// that can express member quantification (e.g. PostgreSQL JSONB via
100    /// `jsonb_array_elements` + `EXISTS`) override this. Backends must fail
101    /// loudly here rather than emit a query with different semantics.
102    fn convert_array_match(
103        &self,
104        field: &str,
105        quantifier: ArrayQuantifier,
106        body: &Detection,
107        state: &mut ConversionState,
108    ) -> Result<String> {
109        let _ = (field, quantifier, body, state);
110        Err(ConvertError::UnsupportedArrayMatching)
111    }
112
113    /// Whether this backend can lower a positional array index (`field[N]`) in
114    /// a field path. Backends that cannot must not silently emit a literal
115    /// field reference (which would diverge from the evaluator's element-`N`
116    /// semantics); the default item conversion rejects indexed fields with
117    /// `UnsupportedArrayMatching`. PostgreSQL overrides this for JSONB mode.
118    fn supports_field_index(&self) -> bool {
119        false
120    }
121
122    // --- Field/value escaping ---
123
124    fn escape_and_quote_field(&self, field: &str) -> String;
125    fn convert_value_str(&self, value: &SigmaString, state: &ConversionState) -> String;
126    fn convert_value_re(&self, regex: &str, state: &ConversionState) -> String;
127
128    // --- Value-type-specific methods ---
129
130    fn convert_field_eq_str(
131        &self,
132        field: &str,
133        value: &SigmaString,
134        modifiers: &[Modifier],
135        state: &mut ConversionState,
136    ) -> Result<ConvertResult>;
137
138    fn convert_field_eq_str_case_sensitive(
139        &self,
140        field: &str,
141        value: &SigmaString,
142        modifiers: &[Modifier],
143        state: &mut ConversionState,
144    ) -> Result<ConvertResult>;
145
146    fn convert_field_eq_num(
147        &self,
148        field: &str,
149        value: f64,
150        state: &mut ConversionState,
151    ) -> Result<String>;
152
153    fn convert_field_eq_bool(
154        &self,
155        field: &str,
156        value: bool,
157        state: &mut ConversionState,
158    ) -> Result<String>;
159
160    fn convert_field_eq_null(&self, field: &str, state: &mut ConversionState) -> Result<String>;
161
162    fn convert_field_eq_re(
163        &self,
164        field: &str,
165        pattern: &str,
166        flags: &[Modifier],
167        state: &mut ConversionState,
168    ) -> Result<ConvertResult>;
169
170    fn convert_field_eq_cidr(
171        &self,
172        field: &str,
173        cidr: &str,
174        state: &mut ConversionState,
175    ) -> Result<ConvertResult>;
176
177    fn convert_field_compare(
178        &self,
179        field: &str,
180        op: &Modifier,
181        value: f64,
182        state: &mut ConversionState,
183    ) -> Result<String>;
184
185    fn convert_field_exists(
186        &self,
187        field: &str,
188        exists: bool,
189        state: &mut ConversionState,
190    ) -> Result<String>;
191
192    fn convert_field_eq_query_expr(
193        &self,
194        field: &str,
195        expr: &str,
196        id: &str,
197        state: &mut ConversionState,
198    ) -> Result<String>;
199
200    fn convert_field_ref(
201        &self,
202        field1: &str,
203        field2: &str,
204        state: &mut ConversionState,
205    ) -> Result<ConvertResult>;
206
207    fn convert_keyword(&self, value: &SigmaValue, state: &mut ConversionState) -> Result<String>;
208
209    // --- IN-list optimization (optional) ---
210
211    fn convert_condition_as_in_expression(
212        &self,
213        _field: &str,
214        _values: &[&SigmaValue],
215        _is_or: bool,
216        _state: &mut ConversionState,
217    ) -> Result<String> {
218        Err(ConvertError::UnsupportedModifier(
219            "IN expression not supported".into(),
220        ))
221    }
222
223    // --- Query finalization ---
224
225    fn finish_query(
226        &self,
227        rule: &SigmaRule,
228        query: String,
229        state: &ConversionState,
230    ) -> Result<String>;
231
232    fn finalize_query(
233        &self,
234        rule: &SigmaRule,
235        query: String,
236        index: usize,
237        state: &ConversionState,
238        output_format: &str,
239    ) -> Result<String>;
240
241    fn finalize_output(&self, queries: Vec<String>, output_format: &str) -> Result<String>;
242
243    /// File extension (no leading dot) for per-rule output files when
244    /// `rsigma backend convert` writes one file per rule into a directory.
245    ///
246    /// The default is `txt`; backends override it so the split files land
247    /// with the extension their target loader expects (`sql` for
248    /// PostgreSQL, `yml` for Fibratus rule YAML). The `output_format`
249    /// argument lets a backend pick a different extension per format (e.g.
250    /// Fibratus emits `.txt` for the bare-expression `expr` format and
251    /// `.yml` for the YAML rule envelope).
252    fn output_file_extension(&self, _output_format: &str) -> &str {
253        "txt"
254    }
255
256    // --- Correlation rule conversion (optional) ---
257
258    fn supports_correlation(&self) -> bool {
259        false
260    }
261
262    /// Correlation generation methods this backend offers, as
263    /// `(name, description)` pairs, mirroring pySigma's `correlation_methods`.
264    ///
265    /// The converting user selects one with the `correlation_method` backend
266    /// option, which overrides a rule's own `window` hint for that conversion.
267    /// An empty slice (the default) means the backend exposes no per-conversion
268    /// choice.
269    fn correlation_methods(&self) -> &[(&str, &str)] {
270        &[]
271    }
272
273    /// The correlation method used when the converting user selects none.
274    fn default_correlation_method(&self) -> &str {
275        "default"
276    }
277
278    /// Convert a correlation rule, discarding any non-fatal warnings.
279    ///
280    /// Convenience wrapper over [`convert_correlation_rule_with_warnings`]; the
281    /// `convert_collection` entry point uses the warnings-aware form so it can
282    /// surface diagnostics. Backends should override the warnings-aware method,
283    /// not this one.
284    ///
285    /// [`convert_correlation_rule_with_warnings`]: Backend::convert_correlation_rule_with_warnings
286    fn convert_correlation_rule(
287        &self,
288        rule: &CorrelationRule,
289        output_format: &str,
290        pipeline_state: &PipelineState,
291    ) -> Result<Vec<String>> {
292        let mut warnings = Vec::new();
293        self.convert_correlation_rule_with_warnings(
294            rule,
295            output_format,
296            pipeline_state,
297            &mut warnings,
298        )
299    }
300
301    /// Convert a correlation rule, appending any non-fatal diagnostics to
302    /// `warnings`.
303    ///
304    /// A backend pushes a warning when it can only approximate a requested
305    /// feature but still emits a usable query (the Sigma "should warn" case),
306    /// and returns [`ConvertError`] when a feature cannot be represented at all
307    /// (the "must error" case).
308    fn convert_correlation_rule_with_warnings(
309        &self,
310        _rule: &CorrelationRule,
311        _output_format: &str,
312        _pipeline_state: &PipelineState,
313        _warnings: &mut Vec<String>,
314    ) -> Result<Vec<String>> {
315        Err(ConvertError::UnsupportedCorrelation(
316            "correlation rules not supported by this backend".into(),
317        ))
318    }
319}
320
321// =============================================================================
322// TextQueryConfig
323// =============================================================================
324
325/// Configuration tokens for text-based query backends.
326///
327/// Mirrors pySigma's `TextQueryBackend` class variables. Backends create a
328/// `const` or `static` instance of this struct and delegate to the
329/// `text_convert_*` free functions for the default conversion logic.
330pub struct TextQueryConfig {
331    // --- Precedence and grouping ---
332    pub precedence: (TokenType, TokenType, TokenType),
333    pub group_expression: &'static str,
334    pub token_separator: &'static str,
335
336    // --- Boolean operators ---
337    pub and_token: &'static str,
338    pub or_token: &'static str,
339    pub not_token: &'static str,
340    pub eq_token: &'static str,
341
342    // --- Negation expressions ---
343    pub not_eq_token: Option<&'static str>,
344    pub eq_expression: Option<&'static str>,
345    pub not_eq_expression: Option<&'static str>,
346    pub convert_not_as_not_eq: bool,
347
348    // --- Wildcards ---
349    pub wildcard_multi: &'static str,
350    pub wildcard_single: &'static str,
351
352    // --- String quoting and escaping ---
353    pub str_quote: &'static str,
354    pub str_quote_pattern: Option<&'static str>,
355    pub str_quote_pattern_negation: bool,
356    pub escape_char: &'static str,
357    pub add_escaped: &'static [&'static str],
358    pub filter_chars: &'static [&'static str],
359
360    // --- Field name quoting and escaping ---
361    pub field_quote: Option<&'static str>,
362    pub field_quote_pattern: Option<&'static str>,
363    pub field_quote_pattern_negation: bool,
364    pub field_escape: Option<&'static str>,
365    pub field_escape_pattern: Option<&'static str>,
366
367    // --- String match expressions ---
368    pub startswith_expression: Option<&'static str>,
369    pub not_startswith_expression: Option<&'static str>,
370    pub startswith_expression_allow_special: bool,
371    pub endswith_expression: Option<&'static str>,
372    pub not_endswith_expression: Option<&'static str>,
373    pub endswith_expression_allow_special: bool,
374    pub contains_expression: Option<&'static str>,
375    pub not_contains_expression: Option<&'static str>,
376    pub contains_expression_allow_special: bool,
377    pub wildcard_match_expression: Option<&'static str>,
378
379    // --- Case-sensitive match expressions ---
380    pub case_sensitive_match_expression: Option<&'static str>,
381    pub case_sensitive_startswith_expression: Option<&'static str>,
382    pub case_sensitive_endswith_expression: Option<&'static str>,
383    pub case_sensitive_contains_expression: Option<&'static str>,
384
385    // --- Regex ---
386    pub re_expression: Option<&'static str>,
387    pub not_re_expression: Option<&'static str>,
388    pub re_escape_char: Option<&'static str>,
389    pub re_escape: &'static [&'static str],
390    pub re_escape_escape_char: Option<&'static str>,
391
392    // --- CIDR ---
393    pub cidr_expression: Option<&'static str>,
394    pub not_cidr_expression: Option<&'static str>,
395
396    // --- Null / field existence ---
397    pub field_null_expression: &'static str,
398    pub field_exists_expression: Option<&'static str>,
399    pub field_not_exists_expression: Option<&'static str>,
400
401    // --- Compare operators ---
402    pub compare_op_expression: Option<&'static str>,
403    pub compare_ops: &'static [(&'static str, &'static str)],
404
405    // --- IN-list optimization ---
406    pub convert_or_as_in: bool,
407    pub convert_and_as_in: bool,
408    pub in_expressions_allow_wildcards: bool,
409    pub field_in_list_expression: Option<&'static str>,
410    pub or_in_operator: Option<&'static str>,
411    pub and_in_operator: Option<&'static str>,
412    pub list_separator: &'static str,
413
414    // --- Unbound/keyword ---
415    pub unbound_value_str_expression: Option<&'static str>,
416    pub unbound_value_num_expression: Option<&'static str>,
417    pub unbound_value_re_expression: Option<&'static str>,
418
419    // --- Field-to-field comparison ---
420    pub field_eq_field_expression: Option<&'static str>,
421    pub field_eq_field_escaping_quoting: bool,
422
423    // --- Deferred query parts ---
424    pub deferred_start: Option<&'static str>,
425    pub deferred_separator: Option<&'static str>,
426    pub deferred_only_query: &'static str,
427
428    // --- Bool values ---
429    pub bool_true: &'static str,
430    pub bool_false: &'static str,
431
432    // --- Query envelope ---
433    pub query_expression: &'static str,
434    pub state_defaults: &'static [(&'static str, &'static str)],
435}
436
437impl TextQueryConfig {
438    /// Check if `inner` needs parenthesisation when nested inside `outer`.
439    pub fn needs_grouping(&self, outer: TokenType, inner: TokenType) -> bool {
440        let rank = |t: TokenType| -> u8 {
441            if t == self.precedence.0 {
442                0
443            } else if t == self.precedence.1 {
444                1
445            } else {
446                2
447            }
448        };
449        rank(inner) > rank(outer)
450    }
451}
452
453// =============================================================================
454// Text-backend free functions
455// =============================================================================
456
457/// Escape and optionally quote a field name according to the config.
458pub fn text_escape_and_quote_field(cfg: &TextQueryConfig, field: &str) -> String {
459    let mut escaped = field.to_string();
460
461    if let Some(esc) = cfg.field_escape
462        && let Some(pat) = cfg.field_escape_pattern
463        && let Some(re) = get_cached_regex(pat)
464    {
465        escaped = re
466            .replace_all(&escaped, |_: &regex::Captures| esc)
467            .to_string();
468    }
469
470    if let Some(quote) = cfg.field_quote {
471        let should_quote = match cfg.field_quote_pattern {
472            Some(pat) => {
473                let matches = get_cached_regex(pat)
474                    .map(|re| re.is_match(&escaped))
475                    .unwrap_or(false);
476                if cfg.field_quote_pattern_negation {
477                    !matches
478                } else {
479                    matches
480                }
481            }
482            None => true,
483        };
484        if should_quote {
485            return format!("{quote}{escaped}{quote}");
486        }
487    }
488
489    escaped
490}
491
492/// Convert a `SigmaString` to its text representation, applying escaping and quoting.
493pub fn text_convert_value_str(cfg: &TextQueryConfig, value: &SigmaString) -> String {
494    let mut result = String::new();
495    let mut has_wildcards = false;
496
497    for part in &value.parts {
498        match part {
499            StringPart::Plain(s) => {
500                let mut escaped = String::with_capacity(s.len());
501                for ch in s.chars() {
502                    let ch_str = ch.to_string();
503                    if cfg.filter_chars.contains(&ch_str.as_str()) {
504                        continue;
505                    }
506                    if ch_str == cfg.escape_char
507                        || ch_str == cfg.str_quote
508                        || cfg.add_escaped.contains(&ch_str.as_str())
509                    {
510                        escaped.push_str(cfg.escape_char);
511                    }
512                    escaped.push(ch);
513                }
514                result.push_str(&escaped);
515            }
516            StringPart::Special(SpecialChar::WildcardMulti) => {
517                result.push_str(cfg.wildcard_multi);
518                has_wildcards = true;
519            }
520            StringPart::Special(SpecialChar::WildcardSingle) => {
521                result.push_str(cfg.wildcard_single);
522                has_wildcards = true;
523            }
524        }
525    }
526
527    if !has_wildcards {
528        let should_quote = match cfg.str_quote_pattern {
529            Some(pat) => {
530                let matches = get_cached_regex(pat)
531                    .map(|re| re.is_match(&result))
532                    .unwrap_or(false);
533                if cfg.str_quote_pattern_negation {
534                    !matches
535                } else {
536                    matches
537                }
538            }
539            None => true,
540        };
541        if should_quote {
542            return format!("{}{result}{}", cfg.str_quote, cfg.str_quote);
543        }
544    }
545
546    result
547}
548
549/// Escape a regex pattern according to the config.
550pub fn text_convert_value_re(cfg: &TextQueryConfig, regex_str: &str) -> String {
551    let mut result = regex_str.to_string();
552
553    if let Some(esc_esc) = cfg.re_escape_escape_char
554        && let Some(esc) = cfg.re_escape_char
555    {
556        result = result.replace(esc, &format!("{esc_esc}{esc}"));
557    }
558
559    if let Some(esc) = cfg.re_escape_char {
560        for pattern in cfg.re_escape {
561            result = result.replace(pattern, &format!("{esc}{pattern}"));
562        }
563    }
564
565    result
566}
567
568/// Precedence-aware grouping.
569pub fn text_convert_condition_group(
570    cfg: &TextQueryConfig,
571    expr: &str,
572    outer: TokenType,
573    inner: TokenType,
574) -> String {
575    if cfg.needs_grouping(outer, inner) {
576        cfg.group_expression.replace("{expr}", expr)
577    } else {
578        expr.to_string()
579    }
580}
581
582/// Join expressions with the AND token.
583pub fn text_convert_condition_and(cfg: &TextQueryConfig, exprs: &[String]) -> String {
584    let sep = if cfg.and_token.is_empty() {
585        cfg.token_separator.to_string()
586    } else {
587        format!(
588            "{}{}{}",
589            cfg.token_separator, cfg.and_token, cfg.token_separator
590        )
591    };
592    exprs.join(&sep)
593}
594
595/// Join expressions with the OR token.
596pub fn text_convert_condition_or(cfg: &TextQueryConfig, exprs: &[String]) -> String {
597    let sep = format!(
598        "{}{}{}",
599        cfg.token_separator, cfg.or_token, cfg.token_separator
600    );
601    exprs.join(&sep)
602}
603
604/// Negate an expression with the NOT token.
605pub fn text_convert_condition_not(cfg: &TextQueryConfig, expr: &str) -> String {
606    format!("{}{}{expr}", cfg.not_token, cfg.token_separator)
607}
608
609/// Assemble the final query from the main condition string and any deferred parts.
610pub fn text_finish_query(
611    cfg: &TextQueryConfig,
612    query: &str,
613    state: &ConversionState,
614    rule: &SigmaRule,
615) -> String {
616    let main_query = if state.has_deferred() && query.is_empty() {
617        cfg.deferred_only_query
618    } else {
619        query
620    };
621
622    let mut result = cfg.query_expression.replace("{query}", main_query);
623
624    // Substitute state defaults first, then actual state values
625    for (key, default) in cfg.state_defaults {
626        let placeholder = format!("{{{key}}}");
627        result = result.replace(&placeholder, default);
628    }
629    for (key, val) in &state.processing_state {
630        if let Some(s) = val.as_str() {
631            let placeholder = format!("{{{key}}}");
632            result = result.replace(&placeholder, s);
633        }
634    }
635
636    // Substitute rule metadata
637    result = result.replace("{rule.title}", &rule.title);
638    if let Some(id) = &rule.id {
639        result = result.replace("{rule.id}", id);
640    }
641
642    // Append deferred parts
643    if state.has_deferred() {
644        let deferred_start = cfg.deferred_start.unwrap_or("");
645        let deferred_sep = cfg.deferred_separator.unwrap_or("");
646        let parts: Vec<String> = state.deferred.iter().map(|d| d.finalize()).collect();
647        result = format!("{result}{deferred_start}{}", parts.join(deferred_sep));
648    }
649
650    result
651}
652
653/// Dispatch string matching based on modifiers and wildcard positions.
654///
655/// Returns the query fragment for a field=value comparison, handling
656/// `contains`, `startswith`, `endswith`, and wildcard patterns.
657pub fn text_convert_field_eq_str(
658    cfg: &TextQueryConfig,
659    field: &str,
660    value: &SigmaString,
661    modifiers: &[Modifier],
662    _state: &ConversionState,
663) -> Result<ConvertResult> {
664    let escaped_field = text_escape_and_quote_field(cfg, field);
665    let is_cased = modifiers.contains(&Modifier::Cased);
666    let is_contains = modifiers.contains(&Modifier::Contains);
667    let is_startswith = modifiers.contains(&Modifier::StartsWith);
668    let is_endswith = modifiers.contains(&Modifier::EndsWith);
669
670    let value_str = text_convert_value_str(cfg, value);
671
672    // Case-sensitive dispatch
673    if is_cased {
674        if is_contains && let Some(expr) = cfg.case_sensitive_contains_expression {
675            return Ok(ConvertResult::Query(
676                expr.replace("{field}", &escaped_field)
677                    .replace("{value}", &value_str),
678            ));
679        }
680        if is_startswith && let Some(expr) = cfg.case_sensitive_startswith_expression {
681            return Ok(ConvertResult::Query(
682                expr.replace("{field}", &escaped_field)
683                    .replace("{value}", &value_str),
684            ));
685        }
686        if is_endswith && let Some(expr) = cfg.case_sensitive_endswith_expression {
687            return Ok(ConvertResult::Query(
688                expr.replace("{field}", &escaped_field)
689                    .replace("{value}", &value_str),
690            ));
691        }
692        if let Some(expr) = cfg.case_sensitive_match_expression {
693            return Ok(ConvertResult::Query(
694                expr.replace("{field}", &escaped_field)
695                    .replace("{value}", &value_str),
696            ));
697        }
698    }
699
700    // Case-insensitive dispatch (default)
701    if is_contains && let Some(expr) = cfg.contains_expression {
702        return Ok(ConvertResult::Query(
703            expr.replace("{field}", &escaped_field)
704                .replace("{value}", &value_str),
705        ));
706    }
707    if is_startswith && let Some(expr) = cfg.startswith_expression {
708        return Ok(ConvertResult::Query(
709            expr.replace("{field}", &escaped_field)
710                .replace("{value}", &value_str),
711        ));
712    }
713    if is_endswith && let Some(expr) = cfg.endswith_expression {
714        return Ok(ConvertResult::Query(
715            expr.replace("{field}", &escaped_field)
716                .replace("{value}", &value_str),
717        ));
718    }
719
720    // Wildcard match fallback
721    if value.contains_wildcards()
722        && let Some(expr) = cfg.wildcard_match_expression
723    {
724        return Ok(ConvertResult::Query(
725            expr.replace("{field}", &escaped_field)
726                .replace("{value}", &value_str),
727        ));
728    }
729
730    // Exact match (default)
731    let result = if let Some(expr) = cfg.eq_expression {
732        expr.replace("{field}", &escaped_field)
733            .replace("{value}", &value_str)
734    } else {
735        format!("{escaped_field}{}{value_str}", cfg.eq_token)
736    };
737    Ok(ConvertResult::Query(result))
738}