rsigma 0.17.0

CLI for parsing, validating, linting and evaluating Sigma detection rules
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
//! Shared helpers and fixture constants for CLI integration tests.
#![allow(dead_code)]

use std::io::{BufRead, BufReader, Write};
use std::net::TcpStream;
use std::process::{Command as StdCommand, Stdio};
use std::time::{Duration, Instant};

use assert_cmd::Command;
use tempfile::NamedTempFile;

#[allow(deprecated)]
pub fn rsigma() -> Command {
    Command::cargo_bin("rsigma").expect("binary not found")
}

/// Path to the freshly-built `rsigma` binary for tests that need to spawn
/// it directly via `std::process::Command` (long-running daemon processes).
pub fn rsigma_bin() -> String {
    assert_cmd::cargo::cargo_bin("rsigma")
        .to_str()
        .unwrap()
        .to_string()
}

/// Write `contents` to a temporary file with the given suffix and return it.
pub fn temp_file(suffix: &str, contents: &str) -> NamedTempFile {
    let mut f = tempfile::Builder::new().suffix(suffix).tempfile().unwrap();
    f.write_all(contents.as_bytes()).unwrap();
    f.flush().unwrap();
    f
}

// ---------------------------------------------------------------------------
// Daemon HTTP process helper
// ---------------------------------------------------------------------------

/// Events the spawn handshake waits for from the daemon's stderr.
enum StartupEvent {
    ApiAddr(String),
    SinkStarted,
}

/// Scope guard that owns a `Child` and kills + waits on drop. Used during
/// daemon startup so that a handshake panic does not leak a daemon process.
struct ChildGuard(Option<std::process::Child>);

impl ChildGuard {
    fn as_child_mut(&mut self) -> &mut std::process::Child {
        self.0.as_mut().expect("guard already disarmed")
    }

    fn disarm(mut self) -> std::process::Child {
        self.0.take().expect("guard already disarmed")
    }
}

impl Drop for ChildGuard {
    fn drop(&mut self) {
        if let Some(mut child) = self.0.take() {
            let _ = child.kill();
            let _ = child.wait();
        }
    }
}

/// A live `rsigma engine daemon` subprocess with a known API address. Killed and
/// reaped on drop.
pub struct DaemonProcess {
    child: std::process::Child,
    api_addr: String,
}

impl DaemonProcess {
    /// Spawn `rsigma` with `args` and block until the daemon's HTTP API
    /// is actually accepting connections.
    ///
    /// The startup handshake:
    /// 1. Drain stdout in a background thread so a busy sink can never fill
    ///    the OS pipe buffer and block the daemon on its own write.
    /// 2. Read stderr in a background thread, forwarding the
    ///    `API server listening` and `Sink started` log lines over a
    ///    channel.
    /// 3. Wait for both events, with a 10s deadline.
    /// 4. Probe the listening TCP socket with `connect_timeout` in a 5s,
    ///    25ms-tick retry loop. `Sink started` is emitted just before
    ///    `axum::serve` enters its accept loop, so the log line alone is
    ///    not a sufficient readiness signal.
    ///
    /// Any panic during the handshake is caught by `ChildGuard`, which
    /// kills + waits on the daemon process so we never leak one.
    pub fn spawn(args: &[&str]) -> Self {
        Self::spawn_with_env(args, &[])
    }

    /// Like [`DaemonProcess::spawn`] but also sets environment variables on the
    /// daemon child. Used to exercise `${ENV_VAR}` template interpolation
    /// (e.g. webhook header secrets) without mutating the test process env.
    pub fn spawn_with_env(args: &[&str], env: &[(&str, &str)]) -> Self {
        let child = StdCommand::new(rsigma_bin())
            .args(args)
            .envs(env.iter().copied())
            .stdin(Stdio::null())
            .stdout(Stdio::piped())
            .stderr(Stdio::piped())
            .spawn()
            .expect("failed to spawn rsigma engine daemon");
        let mut guard = ChildGuard(Some(child));

        if let Some(stdout) = guard.as_child_mut().stdout.take() {
            std::thread::spawn(move || {
                let mut sink = std::io::sink();
                let _ = std::io::copy(&mut BufReader::new(stdout), &mut sink);
            });
        }

        let stderr = guard.as_child_mut().stderr.take().unwrap();
        let (tx, rx) = std::sync::mpsc::channel::<StartupEvent>();
        std::thread::spawn(move || {
            for line in BufReader::new(stderr).lines() {
                let Ok(line) = line else { return };
                if line.contains("API server listening")
                    && let Some(addr) = extract_addr(&line)
                {
                    let _ = tx.send(StartupEvent::ApiAddr(addr));
                }
                if line.contains("Sink started") {
                    let _ = tx.send(StartupEvent::SinkStarted);
                }
            }
        });

        let mut api_addr = String::new();
        let mut sink_started = false;
        let handshake_deadline = Instant::now() + Duration::from_secs(10);
        while !sink_started || api_addr.is_empty() {
            let remaining = handshake_deadline
                .checked_duration_since(Instant::now())
                .unwrap_or(Duration::ZERO);
            match rx.recv_timeout(remaining) {
                Ok(StartupEvent::ApiAddr(addr)) => api_addr = addr,
                Ok(StartupEvent::SinkStarted) => sink_started = true,
                Err(_) => panic!(
                    "daemon did not finish startup within 10s (api_addr={api_addr:?}, sink_started={sink_started})"
                ),
            }
        }

        // The daemon may log a wildcard bind address like `0.0.0.0:PORT`
        // (or `[::]:PORT`). Connecting to a wildcard address returns
        // `WSAEADDRNOTAVAIL` on Windows. Linux and macOS silently treat
        // it as loopback, so the same test was green there. Rewrite the
        // recorded address to the loopback equivalent before probing
        // and before exposing it via `url()`; the daemon listens on
        // every interface so loopback is always reachable.
        let api_addr = rewrite_wildcard_to_loopback(api_addr);

        let socket: std::net::SocketAddr = api_addr
            .parse()
            .unwrap_or_else(|e| panic!("invalid api_addr {api_addr:?}: {e}"));
        let deadline = Instant::now() + Duration::from_secs(5);
        loop {
            if TcpStream::connect_timeout(&socket, Duration::from_millis(200)).is_ok() {
                return Self {
                    child: guard.disarm(),
                    api_addr,
                };
            }
            if Instant::now() >= deadline {
                panic!("daemon API at {api_addr} never became reachable within 5s");
            }
            std::thread::sleep(Duration::from_millis(25));
        }
    }

    /// Spawn `rsigma engine daemon -r RULE --input http --api-addr 127.0.0.1:0`.
    pub fn spawn_http(rule_path: &str) -> Self {
        Self::spawn(&[
            "engine",
            "daemon",
            "-r",
            rule_path,
            "--input",
            "http",
            "--api-addr",
            "127.0.0.1:0",
        ])
    }

    /// Spawn the daemon in HTTP-input mode with extra CLI flags appended
    /// after the standard scaffolding (`-r`, `--input http`,
    /// `--api-addr 127.0.0.1:0`). Useful for opt-in flags like
    /// `--observe-fields` that integration tests need to exercise.
    pub fn spawn_http_with_args(rule_path: &str, extra_args: &[&str]) -> Self {
        let mut args = vec![
            "engine",
            "daemon",
            "-r",
            rule_path,
            "--input",
            "http",
            "--api-addr",
            "127.0.0.1:0",
        ];
        args.extend_from_slice(extra_args);
        Self::spawn(&args)
    }

    /// Like [`DaemonProcess::spawn_http_with_args`] but also sets environment
    /// variables on the daemon child.
    pub fn spawn_http_with_args_env(
        rule_path: &str,
        extra_args: &[&str],
        env: &[(&str, &str)],
    ) -> Self {
        let mut args = vec![
            "engine",
            "daemon",
            "-r",
            rule_path,
            "--input",
            "http",
            "--api-addr",
            "127.0.0.1:0",
        ];
        args.extend_from_slice(extra_args);
        Self::spawn_with_env(&args, env)
    }

    pub fn url(&self, path: &str) -> String {
        format!("http://{}{path}", self.api_addr)
    }

    /// Convenience constructor that returns an `https://...` URL.
    pub fn https_url(&self, path: &str) -> String {
        format!("https://{}{path}", self.api_addr)
    }

    pub fn api_addr(&self) -> &str {
        &self.api_addr
    }

    fn kill(&mut self) {
        let _ = self.child.kill();
        let _ = self.child.wait();
    }
}

/// Spawn the daemon and return either a `DaemonProcess` on success or the
/// stderr line that caused the failure on a hard startup error.
///
/// Use this when a test wants to assert that a misconfigured invocation
/// (e.g. plaintext bind on `0.0.0.0` without `--allow-plaintext`) refuses
/// to start with a specific error message.
pub fn spawn_expect_failure(args: &[&str], deadline: Duration) -> String {
    let mut child = StdCommand::new(rsigma_bin())
        .args(args)
        .stdin(Stdio::null())
        .stdout(Stdio::piped())
        .stderr(Stdio::piped())
        .spawn()
        .expect("failed to spawn rsigma engine daemon");

    let stderr = child.stderr.take().unwrap();
    let (tx, rx) = std::sync::mpsc::channel::<String>();
    std::thread::spawn(move || {
        for line in BufReader::new(stderr).lines() {
            let Ok(line) = line else { return };
            let _ = tx.send(line);
        }
    });

    let end = Instant::now() + deadline;
    let mut collected = Vec::new();
    while Instant::now() < end {
        let remaining = end
            .checked_duration_since(Instant::now())
            .unwrap_or(Duration::ZERO);
        if let Ok(Some(_)) = child.try_wait() {
            break;
        }
        match rx.recv_timeout(remaining.min(Duration::from_millis(200))) {
            Ok(line) => {
                collected.push(line);
            }
            Err(_) => continue,
        }
    }
    let _ = child.kill();
    let _ = child.wait();
    collected.join("\n")
}

impl Drop for DaemonProcess {
    fn drop(&mut self) {
        self.kill();
    }
}

/// Extract the `addr` field from a structured JSON log line of the form
/// `{"fields":{"message":"API server listening","addr":"127.0.0.1:PORT"},...}`.
fn extract_addr(line: &str) -> Option<String> {
    serde_json::from_str::<serde_json::Value>(line)
        .ok()
        .and_then(|v| v["fields"]["addr"].as_str().map(|s| s.to_string()))
}

/// Rewrite a wildcard bind address (`0.0.0.0:PORT` or `[::]:PORT`) to the
/// loopback equivalent. Connecting to a wildcard works on Linux/macOS
/// (silently routed to loopback) but fails with `WSAEADDRNOTAVAIL` on
/// Windows, which made `public_bind_with_allow_plaintext_starts` flake
/// only on Windows CI before this rewrite.
fn rewrite_wildcard_to_loopback(addr: String) -> String {
    match addr.parse::<std::net::SocketAddr>() {
        Ok(parsed) if parsed.ip().is_unspecified() => {
            let port = parsed.port();
            match parsed {
                std::net::SocketAddr::V4(_) => format!("127.0.0.1:{port}"),
                std::net::SocketAddr::V6(_) => format!("[::1]:{port}"),
            }
        }
        _ => addr,
    }
}

// ---------------------------------------------------------------------------
// HTTP and polling helpers
// ---------------------------------------------------------------------------

/// GET `url`. Returns (status, body) for any HTTP response code
/// (including 4xx/5xx with their JSON error bodies). Panics on transport
/// errors only.
pub fn http_get(url: &str) -> (u16, String) {
    let agent: ureq::Agent = ureq::Agent::config_builder()
        .http_status_as_error(false)
        .build()
        .into();
    let resp = agent.get(url).call().expect("HTTP GET failed");
    let status = resp.status().as_u16();
    let body = resp.into_body().read_to_string().unwrap();
    (status, body)
}

/// POST `body` to `url`. Returns (status, body) for both ok and
/// `StatusCode` responses; panics on transport errors.
pub fn http_post(url: &str, body: &str) -> (u16, String) {
    match ureq::post(url).send(body) {
        Ok(resp) => {
            let status = resp.status().as_u16();
            let body = resp.into_body().read_to_string().unwrap();
            (status, body)
        }
        Err(ureq::Error::StatusCode(code)) => (code, String::new()),
        Err(e) => panic!("HTTP POST failed: {e}"),
    }
}

/// DELETE `url`. Returns (status, body) for any HTTP response code
/// (including 4xx/5xx with their JSON error bodies). Panics on transport
/// errors only.
pub fn http_delete(url: &str) -> (u16, String) {
    let agent: ureq::Agent = ureq::Agent::config_builder()
        .http_status_as_error(false)
        .build()
        .into();
    let resp = agent.delete(url).call().expect("HTTP DELETE failed");
    let status = resp.status().as_u16();
    let body = resp.into_body().read_to_string().unwrap();
    (status, body)
}

/// Poll `check` every 50ms until it returns `Some(value)` or `deadline`
/// elapses. Use this in place of fixed sleeps when you want to wait for a
/// specific observable condition.
pub fn poll_until<T>(deadline: Duration, mut check: impl FnMut() -> Option<T>) -> Option<T> {
    let end = Instant::now() + deadline;
    loop {
        if let Some(v) = check() {
            return Some(v);
        }
        if Instant::now() >= end {
            return None;
        }
        std::thread::sleep(Duration::from_millis(50));
    }
}

pub const SIMPLE_RULE: &str = r#"
title: Test Rule
id: 00000000-0000-0000-0000-000000000001
status: test
logsource:
    category: test
    product: test
detection:
    selection:
        CommandLine|contains: "malware"
    condition: selection
level: high
"#;

pub const PIPELINE_YAML: &str = r#"
name: test-pipeline
priority: 10
transformations:
  - type: field_name_mapping
    mapping:
      CommandLine: process.command_line
"#;