use std::collections::BTreeSet;
use std::hash::{Hash, Hasher};
use std::path::{Path, PathBuf};
use std::time::SystemTime;
use serde::Deserialize;
use super::normalize_technique;
pub(crate) const DEFAULT_ATOMICS_URL: &str = "https://raw.githubusercontent.com/redcanaryco/atomic-red-team/master/atomics/Indexes/index.yaml";
pub(crate) const DEFAULT_BASELINE_URL: &str =
"https://raw.githubusercontent.com/SigmaHQ/sigma/master/other/sigma_attack_nav_coverage.json";
pub(crate) struct CrossRef {
pub(crate) ids: BTreeSet<String>,
}
pub(crate) struct Targets {
pub(crate) ids: Vec<String>,
}
const CACHE_MAX_AGE_SECS: u64 = 7 * 24 * 60 * 60;
fn fetch_or_read(spec: &str) -> Result<String, String> {
if spec.starts_with("http://") || spec.starts_with("https://") {
fetch_cached(spec)
} else {
std::fs::read_to_string(spec).map_err(|e| format!("could not read {spec}: {e}"))
}
}
fn cache_path(url: &str) -> Option<PathBuf> {
let dir = dirs::cache_dir()?.join("rsigma").join("coverage");
let mut hasher = std::collections::hash_map::DefaultHasher::new();
url.hash(&mut hasher);
let hash = hasher.finish();
let ext = if url.ends_with(".json") {
"json"
} else {
"yaml"
};
Some(dir.join(format!("{hash:016x}.{ext}")))
}
fn is_fresh(path: &Path) -> bool {
let Ok(meta) = std::fs::metadata(path) else {
return false;
};
let Ok(modified) = meta.modified() else {
return false;
};
SystemTime::now()
.duration_since(modified)
.map(|age| age.as_secs() < CACHE_MAX_AGE_SECS)
.unwrap_or(false)
}
fn fetch_cached(url: &str) -> Result<String, String> {
let cache = cache_path(url);
if let Some(path) = &cache
&& is_fresh(path)
&& let Ok(body) = std::fs::read_to_string(path)
{
return Ok(body);
}
match ureq::get(url).call() {
Ok(response) => {
let body = response
.into_body()
.read_to_string()
.map_err(|e| format!("reading response from {url}: {e}"))?;
if let Some(path) = &cache {
if let Some(parent) = path.parent() {
let _ = std::fs::create_dir_all(parent);
}
let _ = std::fs::write(path, &body);
}
Ok(body)
}
Err(e) => {
if let Some(path) = &cache
&& let Ok(body) = std::fs::read_to_string(path)
{
eprintln!("warning: download of {url} failed ({e}); using stale cache");
return Ok(body);
}
Err(format!("downloading {url}: {e}"))
}
}
}
pub(crate) fn load_atomics(spec: &str) -> Result<CrossRef, String> {
let ids = if Path::new(spec).is_dir() {
atomic_ids_from_dir(Path::new(spec))?
} else {
let raw = fetch_or_read(spec)?;
parse_atomics_index(&raw)?
};
Ok(CrossRef { ids })
}
fn parse_atomics_index(raw: &str) -> Result<BTreeSet<String>, String> {
use serde::de::IgnoredAny;
use std::collections::BTreeMap;
let parsed: BTreeMap<String, BTreeMap<String, IgnoredAny>> =
yaml_serde::from_str(raw).map_err(|e| format!("parsing Atomic Red Team index: {e}"))?;
let mut ids = BTreeSet::new();
for inner in parsed.values() {
for technique_id in inner.keys() {
if let Some(id) = normalize_technique(technique_id) {
ids.insert(id);
}
}
}
Ok(ids)
}
#[derive(Deserialize)]
struct AtomicDoc {
attack_technique: Option<String>,
}
fn atomic_ids_from_dir(dir: &Path) -> Result<BTreeSet<String>, String> {
let mut ids = BTreeSet::new();
walk_atomics(dir, &mut ids)?;
if ids.is_empty() {
return Err(format!(
"no Atomic Red Team technique files found under {}",
dir.display()
));
}
Ok(ids)
}
fn walk_atomics(dir: &Path, ids: &mut BTreeSet<String>) -> Result<(), String> {
let entries = std::fs::read_dir(dir)
.map_err(|e| format!("could not read atomics directory {}: {e}", dir.display()))?;
for entry in entries {
let entry = entry.map_err(|e| format!("could not read entry in {}: {e}", dir.display()))?;
let path = entry.path();
if path.is_dir() {
walk_atomics(&path, ids)?;
} else if path.extension().and_then(|e| e.to_str()) == Some("yaml") {
let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("");
if !stem.starts_with('T') && !stem.starts_with('t') {
continue;
}
let id = std::fs::read_to_string(&path)
.ok()
.and_then(|raw| yaml_serde::from_str::<AtomicDoc>(&raw).ok())
.and_then(|doc| doc.attack_technique)
.and_then(|t| normalize_technique(&t))
.or_else(|| normalize_technique(stem));
if let Some(id) = id {
ids.insert(id);
}
}
}
Ok(())
}
#[derive(Deserialize)]
struct BaselineLayer {
#[serde(default)]
techniques: Vec<BaselineTechnique>,
}
#[derive(Deserialize)]
struct BaselineTechnique {
#[serde(rename = "techniqueID")]
technique_id: String,
#[serde(default)]
score: Option<f64>,
#[serde(default)]
enabled: Option<bool>,
}
pub(crate) fn load_baseline(spec: &str) -> Result<CrossRef, String> {
let raw = fetch_or_read(spec)?;
Ok(CrossRef {
ids: parse_baseline_layer(&raw)?,
})
}
fn parse_baseline_layer(raw: &str) -> Result<BTreeSet<String>, String> {
let layer: BaselineLayer =
serde_json::from_str(raw).map_err(|e| format!("parsing baseline layer: {e}"))?;
let mut ids = BTreeSet::new();
for t in layer.techniques {
if t.enabled == Some(false) {
continue;
}
if t.score.unwrap_or(1.0) <= 0.0 {
continue;
}
if let Some(id) = normalize_technique(&t.technique_id) {
ids.insert(id);
}
}
Ok(ids)
}
pub(crate) fn load_targets(path: &Path) -> Result<Targets, String> {
let raw = std::fs::read_to_string(path)
.map_err(|e| format!("could not read targets file {}: {e}", path.display()))?;
let mut seen = BTreeSet::new();
let mut out = Vec::new();
for line in raw.lines() {
let trimmed = line.split('#').next().unwrap_or("").trim();
if trimmed.is_empty() {
continue;
}
match normalize_technique(trimmed) {
Some(id) => {
if seen.insert(id.clone()) {
out.push(id);
}
}
None => eprintln!("warning: skipping invalid technique id in targets file: {trimmed}"),
}
}
Ok(Targets { ids: out })
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_atomics_index_inner_keys() {
let raw = "\
execution:
T1059:
technique: {}
atomic_tests: []
T1059.001:
technique: {}
defense-evasion:
T1055:
technique: {}
";
let ids = parse_atomics_index(raw).unwrap();
assert!(ids.contains("T1059"));
assert!(ids.contains("T1059.001"));
assert!(ids.contains("T1055"));
assert_eq!(ids.len(), 3);
}
#[test]
fn parses_baseline_layer_filtering_zero_and_disabled() {
let raw = r#"{
"techniques": [
{"techniqueID": "T1059", "score": 5},
{"techniqueID": "T1003", "score": 0},
{"techniqueID": "T1055", "enabled": false, "score": 3},
{"techniqueID": "T1078"}
]
}"#;
let ids = parse_baseline_layer(raw).unwrap();
assert!(ids.contains("T1059"));
assert!(ids.contains("T1078")); assert!(!ids.contains("T1003")); assert!(!ids.contains("T1055")); }
#[test]
fn targets_strips_comments_and_dedupes() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("targets.txt");
std::fs::write(
&path,
"# top techniques\nT1059\nt1003 # credential dumping\n\nT1059\nnot-a-technique\n",
)
.unwrap();
let targets = load_targets(&path).unwrap();
assert_eq!(targets.ids, vec!["T1059".to_string(), "T1003".to_string()]);
}
#[test]
fn fetch_or_read_reads_local_file() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("x.yaml");
std::fs::write(&path, "execution:\n T1059: {}\n").unwrap();
let body = fetch_or_read(path.to_str().unwrap()).unwrap();
assert!(body.contains("T1059"));
}
}