mod expectations;
mod report;
use std::fs::File;
use std::io::BufReader;
use std::path::{Path, PathBuf};
use std::process;
use std::time::Instant;
use clap::parser::ValueSource;
use clap::{ArgMatches, Args};
use rsigma_eval::{CorrelationConfig, CorrelationEngine, Engine, EvaluationResult, Pipeline};
use rsigma_parser::SigmaCollection;
#[cfg(feature = "evtx")]
use super::eval_stream::stream_evtx_events;
use super::eval_stream::{CorrelationProcessor, DetectionProcessor, EventProcessor, stream_events};
use crate::commands::reports::BacktestReport;
use crate::config;
use crate::exit_code;
use crate::output::OutputCtx;
use expectations::{ResolvedExpectations, UnexpectedPolicy};
use report::{Accumulator, result_key};
#[derive(Args, Debug)]
pub(crate) struct BacktestArgs {
#[arg(long = "config", value_name = "PATH")]
pub config: Option<PathBuf>,
#[arg(long = "dry-run")]
pub dry_run: bool,
#[arg(short, long)]
pub rules: Option<PathBuf>,
#[arg(long = "corpus", value_name = "PATH")]
pub corpus: Vec<PathBuf>,
#[arg(long = "expectations", value_name = "PATH")]
pub expectations: Option<PathBuf>,
#[arg(long = "unexpected", value_parser = ["fail", "warn", "ignore"])]
pub unexpected: Option<String>,
#[arg(short = 'p', long = "pipeline")]
pub pipelines: Vec<PathBuf>,
#[arg(long = "jq", conflicts_with = "jsonpath")]
pub jq: Option<String>,
#[arg(long = "jsonpath", conflicts_with = "jq")]
pub jsonpath: Option<String>,
#[arg(long = "input-format", default_value = "auto")]
pub input_format: String,
#[arg(long = "syslog-tz", default_value = "+00:00")]
pub syslog_tz: String,
#[arg(long = "syslog-strip-bom", default_value_t = true, action = clap::ArgAction::Set)]
pub syslog_strip_bom: bool,
#[arg(long = "junit", value_name = "PATH")]
pub junit: Option<PathBuf>,
#[arg(long = "report", value_name = "PATH")]
pub report: Option<PathBuf>,
}
pub(crate) fn apply_backtest_config(args: &mut BacktestArgs, matches: &ArgMatches) {
let base = config::load_and_merge(args.config.as_deref());
if args.dry_run {
config::print_dry_run("backtest", &base);
process::exit(exit_code::SUCCESS);
}
overlay_backtest_config(args, matches, base);
}
fn overlay_backtest_config(
args: &mut BacktestArgs,
matches: &ArgMatches,
base: config::RsigmaConfigPartial,
) {
let explicit = |id: &str| {
matches!(
matches.value_source(id),
Some(ValueSource::CommandLine | ValueSource::EnvVariable)
)
};
if let Some(bt) = base.backtest {
if !explicit("rules")
&& let Some(v) = bt.rules
{
args.rules = Some(v);
}
if !explicit("corpus")
&& let Some(v) = bt.corpus
{
args.corpus = v;
}
if !explicit("expectations")
&& let Some(v) = bt.expectations
{
args.expectations = Some(v);
}
if args.unexpected.is_none()
&& let Some(v) = bt.unexpected
{
args.unexpected = Some(v);
}
if !explicit("pipelines")
&& let Some(v) = bt.pipelines
{
args.pipelines = v;
}
if !explicit("input_format")
&& let Some(v) = bt.input_format
{
args.input_format = v;
}
if !explicit("syslog_tz")
&& let Some(v) = bt.syslog_tz
{
args.syslog_tz = v;
}
if !explicit("syslog_strip_bom")
&& let Some(v) = bt.syslog_strip_bom
{
args.syslog_strip_bom = v;
}
}
}
pub(crate) fn cmd_backtest(args: BacktestArgs, ctx: OutputCtx) -> i32 {
let Some(rules_path) = args.rules.clone() else {
eprintln!("error: no rules path; set --rules or backtest.rules in the config file");
return exit_code::CONFIG_ERROR;
};
if args.corpus.is_empty() {
eprintln!("error: no corpus; set --corpus or backtest.corpus in the config file");
return exit_code::CONFIG_ERROR;
}
let collection = crate::load_collection(&rules_path);
let pipelines = crate::load_pipelines(&args.pipelines);
let event_filter = crate::build_event_filter(args.jq.clone(), args.jsonpath.clone());
let resolved = match &args.expectations {
Some(path) => match expectations::load_and_resolve(path, &collection) {
Ok(r) => Some(r),
Err(e) => {
eprintln!("error: {e}");
return exit_code::CONFIG_ERROR;
}
},
None => None,
};
let policy = match resolve_policy(args.unexpected.as_deref(), resolved.as_ref()) {
Ok(p) => p,
Err(e) => {
eprintln!("error: {e}");
return exit_code::CONFIG_ERROR;
}
};
let report = match run(
&args,
collection,
&pipelines,
&event_filter,
resolved,
policy,
) {
Ok(report) => report,
Err(e) => {
eprintln!("error: {e}");
return exit_code::CONFIG_ERROR;
}
};
report.render(&ctx, args.report.as_deref(), args.junit.as_deref(), policy);
report.exit_code(policy)
}
fn resolve_policy(
cli_or_config: Option<&str>,
resolved: Option<&ResolvedExpectations>,
) -> Result<UnexpectedPolicy, String> {
if let Some(s) = cli_or_config {
return UnexpectedPolicy::parse(s).ok_or_else(|| {
format!("invalid unexpected policy '{s}' (expected fail, warn, ignore)")
});
}
if let Some(p) = resolved.and_then(|r| r.file_default_policy) {
return Ok(p);
}
Ok(UnexpectedPolicy::default())
}
fn run(
args: &BacktestArgs,
collection: SigmaCollection,
pipelines: &[Pipeline],
event_filter: &crate::EventFilter,
resolved: Option<ResolvedExpectations>,
policy: UnexpectedPolicy,
) -> Result<BacktestReport, String> {
let corpus_files = collect_corpus_files(&args.corpus)?;
if corpus_files.is_empty() {
eprintln!("warning: no corpus files found under the given --corpus path(s)");
}
let has_correlations = !collection.correlations.is_empty();
let mut acc = Accumulator::new();
let start = Instant::now();
let detection_engine =
(!has_correlations).then(|| build_detection_engine(&collection, pipelines));
for cf in &corpus_files {
acc.note_file();
let events = if has_correlations {
let mut engine = build_correlation_engine(&collection, pipelines);
let mut processor = CorrelationProcessor {
engine: &mut engine,
};
stream_corpus_file(cf, &mut processor, event_filter, args, &mut acc)
} else {
let engine = detection_engine.as_ref().expect("detection engine built");
let mut processor = DetectionProcessor { engine };
stream_corpus_file(cf, &mut processor, event_filter, args, &mut acc)
};
acc.add_events(events);
}
let duration_ms = start.elapsed().as_millis() as u64;
Ok(BacktestReport::build(
acc,
&collection,
resolved.as_ref(),
policy,
duration_ms,
))
}
fn stream_corpus_file<P: EventProcessor>(
cf: &CorpusFile,
processor: &mut P,
event_filter: &crate::EventFilter,
args: &BacktestArgs,
acc: &mut Accumulator,
) -> u64 {
let file_key = cf.key.clone();
let mut on_result = |m: &EvaluationResult| {
let key = result_key(m).to_string();
acc.record(&key, &file_key);
};
match cf.kind {
CorpusKind::Evtx => {
#[cfg(feature = "evtx")]
{
stream_evtx_events(&cf.path, event_filter, None, processor, &mut on_result)
}
#[cfg(not(feature = "evtx"))]
{
eprintln!(
"warning: skipping EVTX corpus file {} (built without the evtx feature)",
cf.path.display()
);
0
}
}
CorpusKind::Ndjson | CorpusKind::Other => {
let format = match cf.kind {
CorpusKind::Ndjson => "json",
_ => args.input_format.as_str(),
};
let file = match File::open(&cf.path) {
Ok(f) => f,
Err(e) => {
eprintln!("Error opening corpus file '{}': {e}", cf.path.display());
return 0;
}
};
stream_events(
BufReader::new(file),
event_filter,
format,
&args.syslog_tz,
args.syslog_strip_bom,
None,
processor,
&mut on_result,
)
}
}
}
fn build_detection_engine(collection: &SigmaCollection, pipelines: &[Pipeline]) -> Engine {
let mut engine = Engine::new();
for p in pipelines {
engine.add_pipeline(p.clone());
}
if let Err(e) = engine.add_collection(collection) {
eprintln!("Error compiling rules: {e}");
process::exit(exit_code::RULE_ERROR);
}
engine
}
fn build_correlation_engine(
collection: &SigmaCollection,
pipelines: &[Pipeline],
) -> CorrelationEngine {
let mut engine = CorrelationEngine::new(CorrelationConfig::default());
for p in pipelines {
engine.add_pipeline(p.clone());
}
if let Err(e) = engine.add_collection(collection) {
eprintln!("Error compiling rules: {e}");
process::exit(exit_code::RULE_ERROR);
}
engine
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum CorpusKind {
Ndjson,
Evtx,
Other,
}
#[derive(Debug)]
struct CorpusFile {
key: String,
path: PathBuf,
kind: CorpusKind,
}
fn classify(path: &Path) -> CorpusKind {
match path
.extension()
.and_then(|e| e.to_str())
.map(|e| e.to_ascii_lowercase())
.as_deref()
{
Some("ndjson" | "jsonl") => CorpusKind::Ndjson,
Some("evtx") => CorpusKind::Evtx,
_ => CorpusKind::Other,
}
}
fn collect_corpus_files(roots: &[PathBuf]) -> Result<Vec<CorpusFile>, String> {
let mut out = Vec::new();
for root in roots {
if !root.exists() {
return Err(format!("corpus path not found: {}", root.display()));
}
if root.is_file() {
let key = root
.file_name()
.map(|f| f.to_string_lossy().into_owned())
.unwrap_or_else(|| root.to_string_lossy().into_owned());
out.push(CorpusFile {
key,
kind: classify(root),
path: root.clone(),
});
} else if root.is_dir() {
walk_dir(root, root, &mut out)?;
}
}
out.sort_by(|a, b| a.key.cmp(&b.key));
Ok(out)
}
fn walk_dir(root: &Path, dir: &Path, out: &mut Vec<CorpusFile>) -> Result<(), String> {
let read = std::fs::read_dir(dir)
.map_err(|e| format!("could not read corpus directory {}: {e}", dir.display()))?;
let mut paths: Vec<PathBuf> = Vec::new();
for entry in read {
let entry = entry.map_err(|e| {
format!(
"could not read corpus directory entry in {}: {e}",
dir.display()
)
})?;
paths.push(entry.path());
}
paths.sort();
for path in paths {
if path.is_dir() {
walk_dir(root, &path, out)?;
} else if path.is_file() {
let rel = path.strip_prefix(root).unwrap_or(&path);
let key = rel
.to_string_lossy()
.replace(std::path::MAIN_SEPARATOR, "/");
out.push(CorpusFile {
key,
kind: classify(&path),
path,
});
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use clap::{Command, FromArgMatches};
fn parse(argv: &[&str]) -> (BacktestArgs, ArgMatches) {
let cmd = BacktestArgs::augment_args(Command::new("backtest"));
let matches = cmd.get_matches_from(argv);
let args = BacktestArgs::from_arg_matches(&matches).expect("valid args");
(args, matches)
}
fn partial(yaml: &str) -> config::RsigmaConfigPartial {
yaml_serde::from_str(yaml).expect("valid partial")
}
#[test]
fn cli_flag_beats_config_file() {
let (mut args, matches) = parse(&["backtest", "--rules", "/cli/rules"]);
let base = partial("backtest:\n rules: /file/rules\n unexpected: fail\n");
overlay_backtest_config(&mut args, &matches, base);
assert_eq!(args.rules.as_deref(), Some(Path::new("/cli/rules")));
assert_eq!(args.unexpected.as_deref(), Some("fail"));
}
#[test]
fn config_fills_unset_corpus() {
let (mut args, matches) = parse(&["backtest", "--rules", "/r"]);
let base = partial("backtest:\n corpus:\n - /file/corpus\n");
overlay_backtest_config(&mut args, &matches, base);
assert_eq!(args.corpus, vec![PathBuf::from("/file/corpus")]);
}
#[test]
fn cli_unexpected_beats_config() {
let (mut args, matches) = parse(&["backtest", "--rules", "/r", "--unexpected", "ignore"]);
let base = partial("backtest:\n unexpected: fail\n");
overlay_backtest_config(&mut args, &matches, base);
assert_eq!(args.unexpected.as_deref(), Some("ignore"));
}
#[test]
fn policy_precedence_cli_over_file_default() {
let r = ResolvedExpectations {
file_default_policy: Some(UnexpectedPolicy::Fail),
expectations: Vec::new(),
};
assert_eq!(
resolve_policy(Some("ignore"), Some(&r)).unwrap(),
UnexpectedPolicy::Ignore
);
assert_eq!(
resolve_policy(None, Some(&r)).unwrap(),
UnexpectedPolicy::Fail
);
assert_eq!(resolve_policy(None, None).unwrap(), UnexpectedPolicy::Warn);
}
#[test]
fn classify_by_extension() {
assert_eq!(classify(Path::new("a.ndjson")), CorpusKind::Ndjson);
assert_eq!(classify(Path::new("a.jsonl")), CorpusKind::Ndjson);
assert_eq!(classify(Path::new("a.evtx")), CorpusKind::Evtx);
assert_eq!(classify(Path::new("a.log")), CorpusKind::Other);
assert_eq!(classify(Path::new("noext")), CorpusKind::Other);
}
#[test]
fn corpus_walk_is_sorted_and_relative() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(dir.path().join("b.ndjson"), "{}").unwrap();
std::fs::create_dir(dir.path().join("sub")).unwrap();
std::fs::write(dir.path().join("sub").join("a.ndjson"), "{}").unwrap();
let files = collect_corpus_files(&[dir.path().to_path_buf()]).unwrap();
let keys: Vec<&str> = files.iter().map(|f| f.key.as_str()).collect();
assert_eq!(keys, vec!["b.ndjson", "sub/a.ndjson"]);
}
#[test]
fn missing_corpus_path_is_error() {
let err = collect_corpus_files(&[PathBuf::from("/no/such/corpus/path")]).unwrap_err();
assert!(err.contains("not found"), "{err}");
}
}