rsigma 0.18.0 - Docs.rs

# yaml-language-server: $schema=https://timescale.github.io/rsigma/rsigma.schema.json
#
# rsigma configuration file.
#
# Precedence (low to high):
#   compiled defaults < /etc/rsigma < ~/.config/rsigma < .rsigmarc < ./rsigma.yaml < env < CLI flags
#
# Discovery: place this at ./rsigma.yaml, ~/.config/rsigma/config.yaml, or
# /etc/rsigma/config.yaml, or point at it with `--config <path>`.
# Regenerate the JSON Schema with: rsigma config schema
version: 1

global:
  # Diagnostic log format on stderr: text | json (maps to --log-format).
  log_format: text
  # Color policy for human-friendly output: auto | always | never.
  # `auto` honors NO_COLOR and disables color when stdout is not a TTY.
  # color: auto
  # Default structured output format: json | ndjson | table | csv | tsv.
  # Default behavior (when unset): pretty JSON on a TTY, NDJSON when piped.
  # output_format: json

daemon:
  # Path to a Sigma rule file or directory.
  rules: /etc/rsigma/rules
  # Builtin pipeline names (ecs_windows, sysmon) or YAML file paths.
  pipelines: []
  # External dynamic-source files or directories (repeatable).
  sources: []
  # Post-evaluation enricher config file.
  # enrichers: /etc/rsigma/enrichers.yml
  # Alert-pipeline config file (dedup, grouping, silencing, inhibition).
  # alert_pipeline: /etc/rsigma/alert-pipeline.yml
  # Risk-based alerting config file (annotation + per-entity risk incidents).
  # risk: /etc/rsigma/risk.yml

  api:
    # Bind address for health, metrics, and the HTTP/OTLP API.
    addr: "0.0.0.0:9090"
    # TLS settings (ignored unless built with the daemon-tls feature).
    # tls:
    #   cert: /etc/rsigma/tls/cert.pem
    #   key: /etc/rsigma/tls/key.pem
    #   client_ca: /etc/rsigma/tls/ca.pem
    #   min_version: "1.3"
    #   allow_plaintext: false

  input:
    # Event source: stdin | http | nats://host:port/subject
    source: stdin
    # Log format: auto | json | syslog | plain | logfmt | cef
    format: auto
    # Default timezone offset for RFC 3164 syslog.
    syslog_tz: "+00:00"
    # Strip a leading UTF-8 BOM from RFC 5424 syslog messages (RFC 5424 treats
    # it as an encoding marker, not content). Set false to keep it byte-for-byte.
    syslog_strip_bom: true
    # Channel capacity for source->engine and engine->sink queues.
    buffer_size: 10000
    # Max events processed per engine lock acquisition.
    batch_size: 1
    # jq filter / JSONPath query to extract the event payload (mutually exclusive).
    # jq: ".event"
    # jsonpath: "$.event"

  output:
    # Detection sinks (repeatable): stdout | file://path | nats://host:port/subject
    # | otlp(s)://host:port (gRPC) | otlphttp(s)://host:port (HTTP); the `s`
    # variants use TLS. Optional query suffixes: ?on_full=drop (best-effort),
    # ?compression=gzip (OTLP), and for TLS ?ca=, ?client_cert=, ?client_key=
    # (PEM paths; the last two enable mutual TLS) and ?tls_domain= (SNI).
    sinks: [stdout]
    # Dead-letter queue for events that fail processing.
    # dlq: "file:///var/lib/rsigma/dlq.ndjson"
    # Seconds to wait for in-flight events to drain on shutdown.
    drain_timeout: 5
    # Include the full event JSON in each detection.
    include_event: false
    # Pretty-print JSON output.
    pretty: false
    # Async delivery tuning, shared by every sink. The per-sink queue depth
    # follows input.buffer_size; append ?on_full=drop to a sink URL for
    # best-effort (lossy) delivery instead of backpressure.
    # Max delivery retries per sink before a result is routed to the DLQ.
    retry_max: 3
    # Base and ceiling backoff (milliseconds) for delivery retries.
    backoff_base_ms: 100
    backoff_max_ms: 5000
    # Max results per delivery batch, and max ms a partial batch waits.
    batch_max: 64
    batch_flush_ms: 50
    # Webhook config files or directories (repeatable; maps to --webhook).
    # Each declares template-driven HTTP sinks; see the webhooks guide.
    # webhooks:
    #   - /etc/rsigma/webhooks/slack.yaml

  correlation:
    # Suppression window for correlation alerts (e.g. 5m, 1h, 30s).
    # suppress: 5m
    # Action after a correlation fires: alert | reset
    action: alert
    # Correlation event inclusion: none | full | refs
    event_mode: none
    # Max events stored per correlation window group.
    max_events: 10
    # Hard cap on correlation state entries across all correlations and
    # group keys; stalest entries are evicted at the cap.
    max_state_entries: 100000
    # Cap on retained entries within a single group's window state.
    # Unset = unbounded.
    # max_group_entries: 10000
    # Extra event field names for timestamp extraction.
    # timestamp_fields: ["@timestamp"]
    # Behavior when no timestamp field is found: wallclock | skip
    timestamp_fallback: wallclock
    # Suppress detection output for correlation-only rules.
    no_detections: false

  state:
    # SQLite database for persisting correlation state across restarts.
    # db: /var/lib/rsigma/state.db
    # Seconds between periodic state snapshots.
    save_interval: 30

  engine:
    # Enable bloom-filter pre-filtering of positive substring matchers.
    bloom_prefilter: false
    # Match-detail verbosity for detection output: off (default), summary, full.
    # summary adds the matcher kind/selection (and keyword/absence matches);
    # full also records the matched pattern. off keeps the {field, value} shape.
    match_detail: off
    # Memory budget (bytes) for the bloom index. No effect unless bloom_prefilter.
    # bloom_max_bytes: 1048576
    # Observe event field keys for coverage reporting.
    observe_fields: false
    # Hard ceiling on distinct field names tracked by the observer.
    observe_fields_max_keys: 10000
    # Allow include directives to reference remote (HTTP/NATS) sources.
    allow_remote_include: false
    # Enable the cross-rule Aho-Corasick pre-filter (daachorse-index feature).
    # cross_rule_ac: false
    # HTTP egress policy applied to dynamic-source and enrichment HTTP clients.
    # default    = block link-local + cloud-metadata (SSRF defense, allow loopback / private).
    # strict     = also block loopback + RFC1918 private (recommended for hardened deployments).
    # permissive = allow every resolved address (only for tightly controlled environments).
    egress_policy: default

  # Live event tap (GET /api/v1/tap). Records a bounded window of the live
  # event stream as a replayable NDJSON fixture. Opt-in: disabled by default
  # because it can exfiltrate raw event traffic. Enable it (here or with
  # --enable-tap) only behind mTLS. The tuning keys below are config-file-only.
  tap:
    # Accept tap sessions. Disabled by default; set true (or pass --enable-tap)
    # to enable.
    enabled: false
    # Per-session bounded buffer; a full buffer drops events (counted)
    # rather than ever applying backpressure to the engine.
    buffer_events: 8192
    # Maximum concurrent capture sessions (a session over the cap gets 409).
    max_sessions: 2
    # Largest accepted capture window (a longer ?duration gets 400).
    max_duration: 5m

  # Live detection tail (GET /api/v1/detections/stream). Streams detection
  # results (not raw events) as NDJSON. Opt-in: disabled by default, enabled
  # here or with --enable-tail. The tuning keys are config-file-only.
  tail:
    # Accept tail sessions. Disabled by default; set true (or pass
    # --enable-tail) to enable.
    enabled: false
    # Per-session bounded buffer; a full buffer drops detections (counted)
    # rather than ever applying backpressure to the sink task.
    buffer_events: 8192
    # Maximum concurrent tail sessions (a session over the cap gets 409).
    max_sessions: 2

  # Triage feedback loop (POST/GET /api/v1/dispositions). Ingests analyst
  # verdicts (true_positive, false_positive, benign_true_positive) and computes
  # a per-rule false-positive ratio (rsigma_rule_false_positive_ratio). Opt-in:
  # disabled by default, enabled here or with --enable-dispositions.
  dispositions:
    # Accept disposition requests and compute the ratio. Disabled by default.
    enabled: false
    # Optional pull source for dispositions (a file, HTTP, or NATS sources file
    # whose payload is NDJSON or a JSON array). Unset means endpoint-only ingest.
    # source: /etc/rsigma/dispositions-source.yml
    # Rolling window over which dispositions are counted.
    window: 30d
    # Whether benign true positives count toward the ratio numerator:
    # fp_only (default) or fp_and_btp.
    numerator: fp_only
    # Minimum dispositions a rule needs in the window before its ratio is
    # published, so a single false positive cannot publish a misleading 100%.
    min_sample: 5

  # Schema classification and routing. Opt-in. `observe` counts events per
  # recognized schema (and unknowns) for the /api/v1/schemas endpoint and the
  # rsigma_events_by_schema_total metric. `routing` recognizes each event's
  # schema and dispatches it to the pipeline-set bound to that schema in the
  # `routing:` section of `config`, with one shared correlation store across
  # schemas. `config` adds user schema signatures and routing bindings on top
  # of the built-ins. `on_unknown` is the policy for events that match no
  # schema: warn (default), drop, passthrough, or error.
  schema:
    observe: false
    routing: false
    # config: ./schema.yml
    on_unknown: warn

  # Logsource-aware evaluation. Opt-in, conflict-based pruning: skip a rule
  # only when its product/service/category is set on both the rule and the
  # event and they differ, so a `product: windows` event skips `product: linux`
  # rules without dropping Windows-category or logsource-less rules. `field_map`
  # remaps the event field names each dimension is read from (defaults
  # product/service/category). `event_logsource` is a static logsource applied
  # when the field is absent, for a single-source pipeline. Fail-open: an event
  # with no extractable logsource is evaluated against every rule.
  logsource_routing:
    enabled: false
    # field_map:
    #   product: product
    #   service: service
    #   category: category
    # event_logsource:
    #   product: windows
    strict: false

  # NATS secrets (creds/token/password/nkey) are NOT configurable here by
  # design; supply them via environment variables. Ignored unless daemon-nats.
  # nats:
  #   consumer_group: rsigma

eval:
  # Default rules path for `rsigma engine eval`.
  # rules: ./rules
  # pipelines: [sysmon]
  input_format: auto
  syslog_tz: "+00:00"
  # Strip a leading UTF-8 BOM from RFC 5424 syslog messages. Set false to keep it.
  syslog_strip_bom: true
  fail_on_detection: false
  # Schema routing for `engine eval` (see daemon.schema for details). `observe`
  # has no effect here; eval has no schema observer.
  schema:
    routing: false
    # config: ./schema.yml
    on_unknown: warn
  # Logsource-aware evaluation for `engine eval` (see daemon.logsource_routing).
  logsource_routing:
    enabled: false
    # field_map:
    #   product: product
    # event_logsource:
    #   product: windows
    strict: false

backtest:
  # Default rules path for `rsigma rule backtest`.
  # rules: ./rules
  # Event corpus file(s) or directory(ies), walked recursively.
  # corpus: [./corpus]
  # Expectations YAML (per-rule fire-count assertions).
  # expectations: ./expectations.yml
  # Policy for a rule that fires with no covering expectation: fail | warn | ignore.
  # Unset here so the expectations-file default applies; the CLI flag overrides both.
  # unexpected: warn
  # pipelines: [sysmon]
  # Input log format for non-NDJSON corpus files.
  input_format: auto
  syslog_tz: "+00:00"
  # Strip a leading UTF-8 BOM from RFC 5424 syslog messages. Set false to keep it.
  syslog_strip_bom: true

coverage:
  # Default rule file(s) or directory(ies) to map onto ATT&CK (repeatable).
  # rules: [./rules]
  # Cross-reference against the Atomic Red Team index: a local index.yaml, an
  # atomic-red-team `atomics/` directory, or a URL.
  # atomics: https://raw.githubusercontent.com/redcanaryco/atomic-red-team/master/atomics/Indexes/index.yaml
  # Baseline ATT&CK Navigator layer (local path or URL); e.g. the SigmaHQ heatmap.
  # baseline: https://raw.githubusercontent.com/SigmaHQ/sigma/master/other/sigma_attack_nav_coverage.json
  # Target technique list (one technique ID per line; `#` comments allowed).
  # targets: ./threat-model-techniques.txt
  # Exit non-zero when a requested cross-reference reports uncovered techniques.
  # fail_on_gaps: false

scorecard:
  # `rule scorecard` fuses the backtest and coverage JSON reports into per-rule
  # keep/tune/retire verdicts. The two reports are required; supply them on the
  # command line (--backtest/--coverage) or here.
  # backtest: ./backtest.json
  # coverage: ./coverage.json
  # Prometheus exposition snapshot or /metrics URL for production fire volume.
  # metrics: http://localhost:9090/metrics
  # When metrics is a Prometheus query-API base, range window for last-fired.
  # metrics_window: 7d
  # Triage disposition feed for the live false-positive ratio and MTTD/MTTR.
  # triage: ./triage.json
  # Program-artifact output path (.md or .html).
  # report: ./scorecard.md
  # CI policy: fail (exit 1) on verdicts at or worse than none | tune | retire.
  fail_on: none
  # Verdict thresholds (SOC quality-metrics defaults).
  min_precision: 0.8
  tune_max_precision: 0.5
  retire_max_precision: 0.1
  # Minimum total volume for a keep verdict.
  min_volume: 1
  # Staleness window in days (enforced only when last-fired is known).
  stale_window: 30
  # Live false-positive-ratio ceiling.
  max_fp_ratio: 0.5

visibility:
  # Logsource/field to ATT&CK data-source mapping table (local path or URL).
  # Unset uses the bundled default table; a bare `--mapping` flag uses the
  # curated default URL.
  # mapping: ./mapping.json
  # Exit non-zero when a rule-expected data source has no observed telemetry
  # (every mapped field sits in the broken-coverage `missing` set).
  # fail_on_blind_spots: false

doc:
  # `rule doc` reports or scaffolds the ADS detection-strategy document. The ADS
  # bar (enforced statuses and required sections) lives in .rsigma-lint.yml under
  # an `ads:` block, not here.
  # Exit non-zero when any rule falls below the configured ADS bar.
  fail_on_missing: false

hygiene:
  # `rule hygiene` flags retirement candidates: silence, noise, untagged, no
  # owner, incomplete ADS, broken field coverage, and deprecated/stale status.
  # Default rule file(s) or directory(ies) to report on (repeatable).
  # rules: [./rules]
  # Prometheus exposition snapshot or /metrics URL for per-rule fire volume
  # (drives the silence and noisy signals).
  # metrics: http://localhost:9090/metrics
  # When metrics is a Prometheus query-API base, range window for last-fired.
  # metrics_window: 7d
  # A field-observability JSON snapshot (rsigma engine eval --observe-fields, or
  # the daemon's /api/v1/fields) for the broken-coverage signal.
  # fields: ./fields.json
  # Age past which a never-fired rule is a retirement candidate (e.g. 365d).
  silent_threshold: 365d
  # Modified-date age past which a rule is flagged stale (e.g. 365d).
  stale_threshold: 365d
  # Absolute per-window fire ceiling that overrides the robust outlier test.
  # noisy_threshold: 100000
  # CI policy (repeatable): silent | noisy | untagged | no-owner | incomplete-ads
  # | broken-fields | deprecated | any. Exit 1 when a selected condition matches.
  # fail_on: [silent]

# `rsigma mcp serve` settings. The auth token is NOT configurable here by
# design; supply it via --auth-token or RSIGMA_MCP_AUTH_TOKEN.
mcp:
  # Bind address for the Streamable HTTP transport (`--http`). Unset = stdio.
  # http_addr: 127.0.0.1:9100
  # Lint config file applied by the lint_rules tool (`--lint-config`).
  # lint_config: .rsigma-lint.yml
  # Default root for relative path-based tool calls (`--rules-dir`).
  # rules_dir: ./rules