Skip to main content

taudit_core/
custom_rules.rs

1use crate::finding::{
2    Finding, FindingCategory, FindingExtras, FindingSource, Recommendation, Severity,
3};
4use crate::graph::{AuthorityGraph, NodeKind, TrustZone};
5use crate::propagation::PropagationPath;
6use serde::de::{self, MapAccess, Visitor};
7use serde::{Deserialize, Deserializer};
8use std::collections::HashMap;
9use std::fmt;
10use std::fs;
11use std::io;
12use std::path::{Path, PathBuf};
13
14/// A user-defined rule loaded from YAML. Fires when source, sink, and path
15/// predicates all match a propagation path produced by the engine.
16#[derive(Debug, Clone, Deserialize)]
17pub struct CustomRule {
18    pub id: String,
19    pub name: String,
20    #[serde(default)]
21    pub description: String,
22    pub severity: Severity,
23    pub category: FindingCategory,
24    #[serde(rename = "match", default)]
25    pub match_spec: MatchSpec,
26    /// Path of the YAML file this rule was loaded from. Set by
27    /// `load_rules_dir` / `parse_rules_multi_doc_with_source`. Threaded into
28    /// every `Finding` this rule emits (`FindingSource::Custom`) so an
29    /// operator inspecting JSON / SARIF output can distinguish authentic
30    /// built-in findings from any rule that may have been planted in a
31    /// shared `--invariants-dir`. Defaults to `None` for rules constructed
32    /// in tests or in code paths that didn't go through the loader.
33    #[serde(default, skip)]
34    pub source_file: Option<PathBuf>,
35}
36
37#[derive(Debug, Clone, Default, Deserialize)]
38pub struct MatchSpec {
39    #[serde(default)]
40    pub source: NodeMatcher,
41    #[serde(default)]
42    pub sink: NodeMatcher,
43    #[serde(default)]
44    pub path: PathMatcher,
45    /// Graph-level metadata predicate. Applied to `AuthorityGraph::metadata`
46    /// (e.g. `META_TRIGGER`, `META_REPOSITORIES`). When present, ALL conditions
47    /// must hold *in addition to* source/sink/path. Reuses the same typed
48    /// predicate language as node-level metadata (`equals`, `not_equals`,
49    /// `contains`, `in`, plus `not:` negation).
50    #[serde(default)]
51    pub graph_metadata: MetadataMatcher,
52    /// Standalone node predicate. When present, the matcher iterates every
53    /// node in the graph and emits one finding per matching node — the
54    /// source/sink/path fields are ignored, but `graph_metadata:` still
55    /// applies as a graph-wide gate. This is the node-shape-only mode used
56    /// for invariants like "any floating Image" where there is no
57    /// propagation chain to walk.
58    #[serde(default)]
59    pub standalone: Option<NodeMatcher>,
60}
61
62/// A scalar-or-list helper. Lets YAML write `node_type: secret` (single value)
63/// or `node_type: [secret, identity]` (any-of). Single form preserved for
64/// backward compatibility with v0.4.x rule files.
65#[derive(Debug, Clone, Deserialize)]
66#[serde(untagged)]
67pub enum OneOrMany<T> {
68    One(T),
69    Many(Vec<T>),
70}
71
72impl<T: PartialEq> OneOrMany<T> {
73    fn contains(&self, value: &T) -> bool {
74        match self {
75            OneOrMany::One(v) => v == value,
76            OneOrMany::Many(vs) => vs.iter().any(|v| v == value),
77        }
78    }
79}
80
81/// Per-field metadata predicate. Bare string is `equals` (back-compat with
82/// v0.4.x). Operator object supports `equals`, `not_equals`, `contains` (substring
83/// match on string values), and `in` (any-of allowed values).
84#[derive(Debug, Clone, Deserialize)]
85#[serde(untagged)]
86pub enum MetadataPredicate {
87    /// `key: "value"` — equality (back-compat).
88    Equals(String),
89    /// `key: { equals/not_equals/contains/in: ... }`
90    Op(MetadataOp),
91}
92
93#[derive(Debug, Clone, Default, Deserialize)]
94#[serde(deny_unknown_fields)]
95pub struct MetadataOp {
96    #[serde(default)]
97    pub equals: Option<String>,
98    #[serde(default)]
99    pub not_equals: Option<String>,
100    /// Substring match on the string-valued metadata field.
101    #[serde(default)]
102    pub contains: Option<String>,
103    #[serde(default, rename = "in")]
104    pub in_: Option<Vec<String>>,
105}
106
107impl MetadataOp {
108    fn matches(&self, actual: Option<&String>) -> bool {
109        // If the metadata key is absent, only `not_equals` can succeed (against
110        // anything-not-this-value), all positive operators fail.
111        if let Some(want) = &self.equals {
112            if actual.map(|s| s.as_str()) != Some(want.as_str()) {
113                return false;
114            }
115        }
116        if let Some(want) = &self.not_equals {
117            if actual.map(|s| s.as_str()) == Some(want.as_str()) {
118                return false;
119            }
120        }
121        if let Some(needle) = &self.contains {
122            match actual {
123                Some(s) if s.contains(needle.as_str()) => {}
124                _ => return false,
125            }
126        }
127        if let Some(allowed) = &self.in_ {
128            match actual {
129                Some(s) if allowed.iter().any(|a| a == s) => {}
130                _ => return false,
131            }
132        }
133        true
134    }
135}
136
137impl MetadataPredicate {
138    fn matches(&self, actual: Option<&String>) -> bool {
139        match self {
140            MetadataPredicate::Equals(want) => actual.map(|s| s.as_str()) == Some(want.as_str()),
141            MetadataPredicate::Op(op) => op.matches(actual),
142        }
143    }
144}
145
146/// Metadata matcher: map of field -> predicate, with an optional `not`
147/// sub-matcher (negation). The `not:` key is reserved and parsed specially —
148/// it cannot be used as a metadata field name.
149#[derive(Debug, Clone, Default)]
150pub struct MetadataMatcher {
151    pub fields: HashMap<String, MetadataPredicate>,
152    pub not: Option<Box<MetadataMatcher>>,
153}
154
155impl MetadataMatcher {
156    fn matches(&self, metadata: &HashMap<String, String>) -> bool {
157        for (key, pred) in &self.fields {
158            if !pred.matches(metadata.get(key)) {
159                return false;
160            }
161        }
162        if let Some(inner) = &self.not {
163            if inner.matches(metadata) {
164                return false;
165            }
166        }
167        true
168    }
169
170    fn is_empty(&self) -> bool {
171        self.fields.is_empty() && self.not.is_none()
172    }
173}
174
175// Custom Deserialize: pull out reserved `not` key, rest become field predicates.
176impl<'de> Deserialize<'de> for MetadataMatcher {
177    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
178    where
179        D: Deserializer<'de>,
180    {
181        struct MetadataMatcherVisitor;
182
183        impl<'de> Visitor<'de> for MetadataMatcherVisitor {
184            type Value = MetadataMatcher;
185
186            fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
187                f.write_str("a metadata predicate map (field -> string|operator) with optional `not:` sub-map")
188            }
189
190            fn visit_map<M>(self, mut map: M) -> Result<MetadataMatcher, M::Error>
191            where
192                M: MapAccess<'de>,
193            {
194                let mut fields: HashMap<String, MetadataPredicate> = HashMap::new();
195                let mut not: Option<Box<MetadataMatcher>> = None;
196
197                while let Some(key) = map.next_key::<String>()? {
198                    if key == "not" {
199                        if not.is_some() {
200                            return Err(de::Error::duplicate_field("not"));
201                        }
202                        let inner: MetadataMatcher = map.next_value()?;
203                        not = Some(Box::new(inner));
204                    } else {
205                        let value: MetadataPredicate = map.next_value()?;
206                        if fields.insert(key.clone(), value).is_some() {
207                            return Err(de::Error::custom(format!(
208                                "duplicate metadata field `{key}`"
209                            )));
210                        }
211                    }
212                }
213
214                Ok(MetadataMatcher { fields, not })
215            }
216        }
217
218        deserializer.deserialize_map(MetadataMatcherVisitor)
219    }
220}
221
222#[derive(Debug, Clone, Default, Deserialize)]
223pub struct NodeMatcher {
224    /// Single value (`node_type: secret`) or any-of list (`[secret, identity]`).
225    #[serde(default)]
226    pub node_type: Option<OneOrMany<NodeKind>>,
227    /// Single value or any-of list.
228    #[serde(default)]
229    pub trust_zone: Option<OneOrMany<TrustZone>>,
230    #[serde(default)]
231    pub metadata: MetadataMatcher,
232    /// Negation: matches when the inner sub-matcher does NOT match.
233    /// Nested `not` is allowed and double-negation collapses naturally.
234    #[serde(default)]
235    pub not: Option<Box<NodeMatcher>>,
236}
237
238#[derive(Debug, Clone, Default, Deserialize)]
239pub struct PathMatcher {
240    #[serde(default)]
241    pub crosses_to: Vec<TrustZone>,
242}
243
244#[derive(Debug)]
245pub enum CustomRuleError {
246    FileRead(PathBuf, io::Error),
247    YamlParse(PathBuf, serde_yaml::Error),
248    /// A symlink in the rules directory resolved to a path outside the
249    /// declared `--invariants-dir` tree. Refused unless the caller opts in
250    /// via `allow_external_symlinks: true` (CLI flag
251    /// `--invariants-allow-external-symlinks`). See red-team R2 #4.
252    SymlinkOutsideDir {
253        link: PathBuf,
254        target: PathBuf,
255    },
256}
257
258impl fmt::Display for CustomRuleError {
259    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
260        match self {
261            CustomRuleError::FileRead(path, err) => {
262                write!(
263                    f,
264                    "failed to read custom rule file {}: {err}",
265                    path.display()
266                )
267            }
268            CustomRuleError::YamlParse(path, err) => {
269                write!(
270                    f,
271                    "failed to parse custom rule file {}: {err}",
272                    path.display()
273                )
274            }
275            CustomRuleError::SymlinkOutsideDir { link, target } => {
276                write!(
277                    f,
278                    "refusing to follow symlink {} → {} (target outside --invariants-dir; potential symlink traversal). Use --invariants-allow-external-symlinks to override.",
279                    link.display(),
280                    target.display()
281                )
282            }
283        }
284    }
285}
286
287impl std::error::Error for CustomRuleError {
288    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
289        match self {
290            CustomRuleError::FileRead(_, err) => Some(err),
291            CustomRuleError::YamlParse(_, err) => Some(err),
292            CustomRuleError::SymlinkOutsideDir { .. } => None,
293        }
294    }
295}
296
297/// Load all `*.yml` and `*.yaml` files from `dir`. Files are read in sorted
298/// order for deterministic output. Returns a list of all errors alongside
299/// successfully parsed rules — callers decide whether to fail fast or continue.
300///
301/// Symlinks pointing OUTSIDE `dir` are refused by default (red-team R2 #4).
302/// Use [`load_rules_dir_with_opts`] to opt into the legacy follow-everything
303/// behavior.
304pub fn load_rules_dir(dir: &Path) -> Result<Vec<CustomRule>, Vec<CustomRuleError>> {
305    load_rules_dir_with_opts(dir, false)
306}
307
308/// Like [`load_rules_dir`] but lets the caller decide what to do with
309/// symlinks that escape the declared directory.
310///
311/// - In-tree symlinks (canonicalized target lives under canonicalized `dir`)
312///   are always followed; a stderr warning is emitted naming the link and
313///   target so the user is never surprised.
314/// - Out-of-tree symlinks are:
315///   - REFUSED with a `CustomRuleError::SymlinkOutsideDir` when
316///     `allow_external_symlinks` is `false` (default — safe).
317///   - Followed, with a louder stderr warning, when
318///     `allow_external_symlinks` is `true` (caller opted in via
319///     `--invariants-allow-external-symlinks`).
320///
321/// Why: the loader walks `--invariants-dir` recursively and previously
322/// followed every symlink without checking. A symlink under the directory
323/// pointing OUT (e.g. to `/etc/passwd` or an attacker-controlled file)
324/// was silently read in. This function makes that escape opt-in.
325pub fn load_rules_dir_with_opts(
326    dir: &Path,
327    allow_external_symlinks: bool,
328) -> Result<Vec<CustomRule>, Vec<CustomRuleError>> {
329    let mut entries: Vec<PathBuf> = Vec::new();
330    let read_dir = match fs::read_dir(dir) {
331        Ok(rd) => rd,
332        Err(err) => return Err(vec![CustomRuleError::FileRead(dir.to_path_buf(), err)]),
333    };
334
335    // Canonicalize the directory once so we can compare every symlink target
336    // against the same normalized prefix. If canonicalization fails (e.g. a
337    // broken symlink in the path), fall back to the literal path — better to
338    // be conservative and treat *every* symlink as out-of-tree than to crash.
339    let canonical_dir = fs::canonicalize(dir).unwrap_or_else(|_| dir.to_path_buf());
340
341    let mut errors: Vec<CustomRuleError> = Vec::new();
342
343    for entry in read_dir.flatten() {
344        let path = entry.path();
345
346        // is_symlink uses symlink_metadata under the hood — does not follow.
347        let is_symlink = entry
348            .file_type()
349            .map(|ft| ft.is_symlink())
350            .unwrap_or_else(|_| path.is_symlink());
351
352        if is_symlink {
353            // Resolve the symlink target. canonicalize() follows the chain.
354            // A broken symlink shows up as Err here — treat it like any other
355            // file-read error.
356            let canonical_target = match fs::canonicalize(&path) {
357                Ok(t) => t,
358                Err(err) => {
359                    errors.push(CustomRuleError::FileRead(path.clone(), err));
360                    continue;
361                }
362            };
363
364            let in_tree = canonical_target.starts_with(&canonical_dir);
365
366            if !in_tree {
367                if allow_external_symlinks {
368                    eprintln!(
369                        "WARNING: following external symlink {} → {} (allowed by --invariants-allow-external-symlinks)",
370                        path.display(),
371                        canonical_target.display()
372                    );
373                } else {
374                    errors.push(CustomRuleError::SymlinkOutsideDir {
375                        link: path,
376                        target: canonical_target,
377                    });
378                    continue;
379                }
380            } else {
381                eprintln!(
382                    "WARNING: following symlink {} → {}",
383                    path.display(),
384                    canonical_target.display()
385                );
386            }
387        }
388
389        // Use is_file (which follows symlinks) so the surviving symlinks-
390        // pointing-at-files behave the same as a regular file.
391        if !path.is_file() {
392            continue;
393        }
394        match path.extension().and_then(|e| e.to_str()) {
395            Some("yml") | Some("yaml") => entries.push(path),
396            _ => {}
397        }
398    }
399    entries.sort();
400
401    let mut rules = Vec::new();
402    for path in entries {
403        match fs::read_to_string(&path) {
404            Ok(content) => match parse_rules_multi_doc_with_source(&content, Some(&path)) {
405                Ok(mut parsed) => rules.append(&mut parsed),
406                Err(err) => errors.push(CustomRuleError::YamlParse(path, err)),
407            },
408            Err(err) => errors.push(CustomRuleError::FileRead(path, err)),
409        }
410    }
411
412    if errors.is_empty() {
413        Ok(rules)
414    } else {
415        Err(errors)
416    }
417}
418
419/// Parse a YAML string containing one or more `CustomRule` documents (separated
420/// by `---`). Single-doc files behave identically to the legacy
421/// `serde_yaml::from_str::<CustomRule>` path. Empty/whitespace-only documents
422/// (e.g. a leading `---` followed by a real doc) are skipped.
423///
424/// Equivalent to `parse_rules_multi_doc_with_source(content, None)` — provenance
425/// stamping is opt-in via the `_with_source` variant so callers that don't
426/// know the originating path (tests, stdin) keep working unchanged.
427pub fn parse_rules_multi_doc(content: &str) -> Result<Vec<CustomRule>, serde_yaml::Error> {
428    parse_rules_multi_doc_with_source(content, None)
429}
430
431/// Parse one or more `CustomRule` documents from `content` and stamp every
432/// produced rule with `source_file = source` so downstream finding emission
433/// can attribute authority back to the originating YAML file. Used by
434/// `load_rules_dir` to thread file paths through into `FindingSource::Custom`.
435pub fn parse_rules_multi_doc_with_source(
436    content: &str,
437    source: Option<&Path>,
438) -> Result<Vec<CustomRule>, serde_yaml::Error> {
439    let mut rules = Vec::new();
440    for doc in serde_yaml::Deserializer::from_str(content) {
441        // An empty document deserializes as `Value::Null`; skip those so a
442        // leading `---` or trailing separator doesn't break the load.
443        let value = serde_yaml::Value::deserialize(doc)?;
444        if value.is_null() {
445            continue;
446        }
447        let mut rule: CustomRule = serde_yaml::from_value(value)?;
448        rule.source_file = source.map(|p| p.to_path_buf());
449        rules.push(rule);
450    }
451    Ok(rules)
452}
453
454impl NodeMatcher {
455    fn matches(&self, node: &crate::graph::Node) -> bool {
456        if let Some(kinds) = &self.node_type {
457            if !kinds.contains(&node.kind) {
458                return false;
459            }
460        }
461        if let Some(zones) = &self.trust_zone {
462            if !zones.contains(&node.trust_zone) {
463                return false;
464            }
465        }
466        if !self.metadata.matches(&node.metadata) {
467            return false;
468        }
469        if let Some(inner) = &self.not {
470            if inner.matches(node) {
471                return false;
472            }
473        }
474        true
475    }
476
477    /// True when the matcher has no constraints — used by tests/tooling.
478    #[allow(dead_code)]
479    fn is_wildcard(&self) -> bool {
480        self.node_type.is_none()
481            && self.trust_zone.is_none()
482            && self.metadata.is_empty()
483            && self.not.is_none()
484    }
485}
486
487impl PathMatcher {
488    fn matches(&self, path: &PropagationPath) -> bool {
489        if self.crosses_to.is_empty() {
490            return true;
491        }
492        match path.boundary_crossing {
493            Some((_, to_zone)) => self.crosses_to.contains(&to_zone),
494            None => false,
495        }
496    }
497}
498
499/// Evaluate every (rule, path) pair. A finding is produced when the rule's
500/// source, sink, and path predicates all match. Findings carry the rule id in
501/// the message so operators can trace back to the originating YAML.
502pub fn evaluate_custom_rules(
503    graph: &AuthorityGraph,
504    paths: &[PropagationPath],
505    rules: &[CustomRule],
506) -> Vec<Finding> {
507    let mut findings = Vec::new();
508
509    for rule in rules {
510        // Standalone (node-shape-only) mode: when `standalone:` is present,
511        // walk every node in the graph and emit one finding per match. The
512        // source/sink/path fields are ignored, but `graph_metadata:` still
513        // gates whether the rule runs at all — that's how PR-context
514        // assertions on node shape work.
515        if let Some(matcher) = &rule.match_spec.standalone {
516            if !rule.match_spec.graph_metadata.matches(&graph.metadata) {
517                continue;
518            }
519            for node in &graph.nodes {
520                if !matcher.matches(node) {
521                    continue;
522                }
523                findings.push(Finding {
524                    severity: rule.severity,
525                    category: rule.category,
526                    nodes_involved: vec![node.id],
527                    message: format!("[{}] {}: {}", rule.id, rule.name, node.name),
528                    recommendation: Recommendation::Manual {
529                        action: if rule.description.is_empty() {
530                            format!("Review custom rule '{}'", rule.id)
531                        } else {
532                            rule.description.clone()
533                        },
534                    },
535                    path: None,
536                    source: custom_source(rule),
537                    extras: FindingExtras::default(),
538                });
539            }
540            continue;
541        }
542
543        // Graph-level metadata gate: if the predicate doesn't hold against
544        // `graph.metadata`, no path in this graph can match this rule. Skip
545        // the path loop entirely. An empty `graph_metadata:` (the common case
546        // for rules that don't care about graph-level state) trivially matches.
547        if !rule.match_spec.graph_metadata.matches(&graph.metadata) {
548            continue;
549        }
550
551        for path in paths {
552            let source_node = match graph.node(path.source) {
553                Some(n) => n,
554                None => continue,
555            };
556            let sink_node = match graph.node(path.sink) {
557                Some(n) => n,
558                None => continue,
559            };
560
561            if !rule.match_spec.source.matches(source_node) {
562                continue;
563            }
564            if !rule.match_spec.sink.matches(sink_node) {
565                continue;
566            }
567            if !rule.match_spec.path.matches(path) {
568                continue;
569            }
570
571            findings.push(Finding {
572                severity: rule.severity,
573                category: rule.category,
574                nodes_involved: vec![path.source, path.sink],
575                message: format!(
576                    "[{}] {}: {} -> {}",
577                    rule.id, rule.name, source_node.name, sink_node.name
578                ),
579                recommendation: Recommendation::Manual {
580                    action: if rule.description.is_empty() {
581                        format!("Review custom rule '{}'", rule.id)
582                    } else {
583                        rule.description.clone()
584                    },
585                },
586                path: Some(path.clone()),
587                source: custom_source(rule),
588                extras: FindingExtras::default(),
589            });
590        }
591    }
592
593    findings
594}
595
596/// Build a `FindingSource::Custom` from the rule's tracked YAML path. Falls
597/// back to an empty path when the rule was constructed in-memory (test,
598/// stdin) and never carried provenance — those callers already know the
599/// finding is custom; the empty path just makes that obvious.
600fn custom_source(rule: &CustomRule) -> FindingSource {
601    FindingSource::Custom {
602        source_file: rule.source_file.clone().unwrap_or_default(),
603    }
604}
605
606#[cfg(test)]
607mod tests {
608    use super::*;
609    use crate::graph::{AuthorityGraph, EdgeKind, PipelineSource};
610    use crate::propagation::{propagation_analysis, DEFAULT_MAX_HOPS};
611
612    fn source() -> PipelineSource {
613        PipelineSource {
614            file: "test.yml".into(),
615            repo: None,
616            git_ref: None,
617            commit_sha: None,
618        }
619    }
620
621    fn build_graph_with_paths() -> (AuthorityGraph, Vec<PropagationPath>) {
622        let mut g = AuthorityGraph::new(source());
623        let secret = g.add_node(NodeKind::Secret, "API_KEY", TrustZone::FirstParty);
624        let trusted = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
625        let untrusted = g.add_node(NodeKind::Step, "third-party", TrustZone::Untrusted);
626
627        g.add_edge(trusted, secret, EdgeKind::HasAccessTo);
628        g.add_edge(trusted, untrusted, EdgeKind::DelegatesTo);
629
630        let paths = propagation_analysis(&g, DEFAULT_MAX_HOPS);
631        (g, paths)
632    }
633
634    fn one<T>(v: T) -> Option<OneOrMany<T>> {
635        Some(OneOrMany::One(v))
636    }
637
638    #[test]
639    fn custom_rule_fires_on_matching_path() {
640        let (graph, paths) = build_graph_with_paths();
641
642        let rule = CustomRule {
643            id: "secret_to_untrusted".into(),
644            name: "Secret reaching untrusted step".into(),
645            description: "Custom policy".into(),
646            severity: Severity::Critical,
647            category: FindingCategory::AuthorityPropagation,
648            match_spec: MatchSpec {
649                source: NodeMatcher {
650                    node_type: None,
651                    trust_zone: one(TrustZone::FirstParty),
652                    metadata: MetadataMatcher::default(),
653                    not: None,
654                },
655                sink: NodeMatcher {
656                    node_type: None,
657                    trust_zone: one(TrustZone::Untrusted),
658                    metadata: MetadataMatcher::default(),
659                    not: None,
660                },
661                path: PathMatcher::default(),
662                graph_metadata: MetadataMatcher::default(),
663                standalone: None,
664            },
665            source_file: None,
666        };
667
668        let findings = evaluate_custom_rules(&graph, &paths, &[rule]);
669        assert_eq!(findings.len(), 1);
670        assert_eq!(findings[0].severity, Severity::Critical);
671        assert!(findings[0].message.contains("secret_to_untrusted"));
672    }
673
674    #[test]
675    fn custom_rule_does_not_fire_when_predicates_miss() {
676        let (graph, paths) = build_graph_with_paths();
677
678        let rule = CustomRule {
679            id: "miss".into(),
680            name: "Untrusted source".into(),
681            description: String::new(),
682            severity: Severity::Critical,
683            category: FindingCategory::AuthorityPropagation,
684            match_spec: MatchSpec {
685                source: NodeMatcher {
686                    node_type: None,
687                    trust_zone: one(TrustZone::Untrusted),
688                    metadata: MetadataMatcher::default(),
689                    not: None,
690                },
691                sink: NodeMatcher::default(),
692                path: PathMatcher::default(),
693                graph_metadata: MetadataMatcher::default(),
694                standalone: None,
695            },
696            source_file: None,
697        };
698
699        let findings = evaluate_custom_rules(&graph, &paths, &[rule]);
700        assert!(findings.is_empty());
701    }
702
703    #[test]
704    fn yaml_round_trip_loads_full_rule() {
705        let yaml = r#"
706id: my_secret_to_untrusted
707name: Secret reaching untrusted step
708description: "Custom policy: secrets must not reach untrusted steps"
709severity: critical
710category: authority_propagation
711match:
712  source:
713    node_type: secret
714    trust_zone: first_party
715  sink:
716    node_type: step
717    trust_zone: untrusted
718  path:
719    crosses_to: [untrusted]
720"#;
721        let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml must parse");
722        assert_eq!(rule.id, "my_secret_to_untrusted");
723        assert_eq!(rule.severity, Severity::Critical);
724        assert!(matches!(
725            rule.match_spec.source.node_type,
726            Some(OneOrMany::One(NodeKind::Secret))
727        ));
728        assert!(matches!(
729            rule.match_spec.sink.trust_zone,
730            Some(OneOrMany::One(TrustZone::Untrusted))
731        ));
732        assert_eq!(rule.match_spec.path.crosses_to, vec![TrustZone::Untrusted]);
733    }
734
735    #[test]
736    fn metadata_predicate_must_match_all_keys() {
737        let mut g = AuthorityGraph::new(source());
738        let mut meta = HashMap::new();
739        meta.insert("kind".to_string(), "deploy".to_string());
740        let secret =
741            g.add_node_with_metadata(NodeKind::Secret, "TOKEN", TrustZone::FirstParty, meta);
742        let sink = g.add_node(NodeKind::Step, "remote", TrustZone::Untrusted);
743        let step = g.add_node(NodeKind::Step, "use", TrustZone::FirstParty);
744        g.add_edge(step, secret, EdgeKind::HasAccessTo);
745        g.add_edge(step, sink, EdgeKind::DelegatesTo);
746
747        let paths = propagation_analysis(&g, DEFAULT_MAX_HOPS);
748
749        let mut want_fields = HashMap::new();
750        want_fields.insert(
751            "kind".to_string(),
752            MetadataPredicate::Equals("deploy".to_string()),
753        );
754        let hit = CustomRule {
755            id: "hit".into(),
756            name: "n".into(),
757            description: String::new(),
758            severity: Severity::High,
759            category: FindingCategory::AuthorityPropagation,
760            match_spec: MatchSpec {
761                source: NodeMatcher {
762                    node_type: one(NodeKind::Secret),
763                    trust_zone: None,
764                    metadata: MetadataMatcher {
765                        fields: want_fields,
766                        not: None,
767                    },
768                    not: None,
769                },
770                sink: NodeMatcher::default(),
771                path: PathMatcher::default(),
772                graph_metadata: MetadataMatcher::default(),
773                standalone: None,
774            },
775            source_file: None,
776        };
777        assert_eq!(evaluate_custom_rules(&g, &paths, &[hit]).len(), 1);
778
779        let mut wrong_fields = HashMap::new();
780        wrong_fields.insert(
781            "kind".to_string(),
782            MetadataPredicate::Equals("build".to_string()),
783        );
784        let miss = CustomRule {
785            id: "miss".into(),
786            name: "n".into(),
787            description: String::new(),
788            severity: Severity::High,
789            category: FindingCategory::AuthorityPropagation,
790            match_spec: MatchSpec {
791                source: NodeMatcher {
792                    node_type: one(NodeKind::Secret),
793                    trust_zone: None,
794                    metadata: MetadataMatcher {
795                        fields: wrong_fields,
796                        not: None,
797                    },
798                    not: None,
799                },
800                sink: NodeMatcher::default(),
801                path: PathMatcher::default(),
802                graph_metadata: MetadataMatcher::default(),
803                standalone: None,
804            },
805            source_file: None,
806        };
807        assert!(evaluate_custom_rules(&g, &paths, &[miss]).is_empty());
808    }
809
810    #[test]
811    fn load_rules_dir_reads_yml_and_yaml() {
812        let tmp = std::env::temp_dir().join(format!("taudit-custom-rules-{}", std::process::id()));
813        fs::create_dir_all(&tmp).unwrap();
814        let yml_path = tmp.join("a.yml");
815        let yaml_path = tmp.join("b.yaml");
816        let other_path = tmp.join("c.txt");
817
818        fs::write(
819            &yml_path,
820            "id: a\nname: a\nseverity: high\ncategory: authority_propagation\n",
821        )
822        .unwrap();
823        fs::write(
824            &yaml_path,
825            "id: b\nname: b\nseverity: medium\ncategory: unpinned_action\n",
826        )
827        .unwrap();
828        fs::write(&other_path, "ignored").unwrap();
829
830        let rules = load_rules_dir(&tmp).expect("load must succeed");
831        assert_eq!(rules.len(), 2);
832        assert_eq!(rules[0].id, "a");
833        assert_eq!(rules[1].id, "b");
834
835        // cleanup
836        let _ = fs::remove_dir_all(&tmp);
837    }
838
839    #[test]
840    fn load_rules_dir_reports_yaml_errors_with_path() {
841        let tmp =
842            std::env::temp_dir().join(format!("taudit-custom-rules-bad-{}", std::process::id()));
843        fs::create_dir_all(&tmp).unwrap();
844        let bad = tmp.join("bad.yml");
845        fs::write(&bad, "id: x\nseverity: not-a-real-severity\n").unwrap();
846
847        let errs = load_rules_dir(&tmp).expect_err("should fail");
848        assert_eq!(errs.len(), 1);
849        let msg = errs[0].to_string();
850        assert!(msg.contains("bad.yml"), "error must mention path: {msg}");
851
852        let _ = fs::remove_dir_all(&tmp);
853    }
854
855    // ── v0.6 grammar additions: negation + typed metadata predicates ─────
856
857    /// Build a graph with one secret in first_party reaching one untrusted
858    /// step. Used by the new grammar tests.
859    fn simple_first_to_untrusted_graph() -> (AuthorityGraph, Vec<PropagationPath>) {
860        let mut g = AuthorityGraph::new(source());
861        let mut meta = HashMap::new();
862        meta.insert("oidc".to_string(), "true".to_string());
863        meta.insert("permissions".to_string(), "contents: write".to_string());
864        meta.insert("role".to_string(), "admin".to_string());
865        let secret =
866            g.add_node_with_metadata(NodeKind::Identity, "GH_TOKEN", TrustZone::FirstParty, meta);
867        let step = g.add_node(NodeKind::Step, "use-it", TrustZone::FirstParty);
868        let untrusted = g.add_node(NodeKind::Step, "third-party", TrustZone::Untrusted);
869        g.add_edge(step, secret, EdgeKind::HasAccessTo);
870        g.add_edge(step, untrusted, EdgeKind::DelegatesTo);
871        let paths = propagation_analysis(&g, DEFAULT_MAX_HOPS);
872        (g, paths)
873    }
874
875    #[test]
876    fn negation_on_trust_zone_inverts_match() {
877        let (graph, paths) = simple_first_to_untrusted_graph();
878        // sink is untrusted; "not untrusted" must NOT match the sink → no findings
879        let yaml = r#"
880id: r
881name: r
882severity: high
883category: authority_propagation
884match:
885  sink:
886    not:
887      trust_zone: untrusted
888"#;
889        let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
890        assert!(evaluate_custom_rules(&graph, &paths, &[rule]).is_empty());
891    }
892
893    #[test]
894    fn negation_on_node_type_list_matches_other_kinds() {
895        let (graph, paths) = simple_first_to_untrusted_graph();
896        // source kinds in fixtures: identity. "not [secret, identity]" excludes it
897        // → source predicate fails → no findings.
898        let yaml = r#"
899id: r
900name: r
901severity: high
902category: authority_propagation
903match:
904  source:
905    not:
906      node_type: [secret, identity]
907"#;
908        let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
909        assert!(evaluate_custom_rules(&graph, &paths, &[rule]).is_empty());
910
911        // Inverse: "not [step]" — source is identity, so the inner does NOT match,
912        // therefore the not-wrapper matches → at least one finding fires.
913        let yaml2 = r#"
914id: r2
915name: r2
916severity: high
917category: authority_propagation
918match:
919  source:
920    not:
921      node_type: [step]
922"#;
923        let rule2: CustomRule = serde_yaml::from_str(yaml2).expect("yaml parses");
924        assert!(!evaluate_custom_rules(&graph, &paths, &[rule2]).is_empty());
925    }
926
927    #[test]
928    fn metadata_negation_matches_absent_or_other_value() {
929        let (graph, paths) = simple_first_to_untrusted_graph();
930        // The identity has oidc=true. `not: { oidc: "true" }` excludes it →
931        // no finding when applied to the source.
932        let yaml = r#"
933id: r
934name: r
935severity: high
936category: authority_propagation
937match:
938  source:
939    metadata:
940      not:
941        oidc: "true"
942"#;
943        let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
944        assert!(evaluate_custom_rules(&graph, &paths, &[rule]).is_empty());
945    }
946
947    #[test]
948    fn metadata_contains_does_substring_match() {
949        let (graph, paths) = simple_first_to_untrusted_graph();
950        let yaml = r#"
951id: r
952name: r
953severity: high
954category: authority_propagation
955match:
956  source:
957    metadata:
958      permissions:
959        contains: "contents: write"
960"#;
961        let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
962        assert_eq!(evaluate_custom_rules(&graph, &paths, &[rule]).len(), 1);
963
964        // negative case: substring not present
965        let yaml_miss = r#"
966id: r
967name: r
968severity: high
969category: authority_propagation
970match:
971  source:
972    metadata:
973      permissions:
974        contains: "actions: write"
975"#;
976        let rule_miss: CustomRule = serde_yaml::from_str(yaml_miss).expect("yaml parses");
977        assert!(evaluate_custom_rules(&graph, &paths, &[rule_miss]).is_empty());
978    }
979
980    #[test]
981    fn metadata_in_matches_any_of_allowed_values() {
982        let (graph, paths) = simple_first_to_untrusted_graph();
983        let yaml = r#"
984id: r
985name: r
986severity: high
987category: authority_propagation
988match:
989  source:
990    metadata:
991      role:
992        in: [admin, owner, write]
993"#;
994        let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
995        assert_eq!(evaluate_custom_rules(&graph, &paths, &[rule]).len(), 1);
996
997        let yaml_miss = r#"
998id: r
999name: r
1000severity: high
1001category: authority_propagation
1002match:
1003  source:
1004    metadata:
1005      role:
1006        in: [reader, none]
1007"#;
1008        let rule_miss: CustomRule = serde_yaml::from_str(yaml_miss).expect("yaml parses");
1009        assert!(evaluate_custom_rules(&graph, &paths, &[rule_miss]).is_empty());
1010    }
1011
1012    #[test]
1013    fn metadata_not_equals_excludes_specific_value() {
1014        let (graph, paths) = simple_first_to_untrusted_graph();
1015        let yaml = r#"
1016id: r
1017name: r
1018severity: high
1019category: authority_propagation
1020match:
1021  source:
1022    metadata:
1023      role:
1024        not_equals: admin
1025"#;
1026        let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1027        // role=admin → not_equals fails → no findings
1028        assert!(evaluate_custom_rules(&graph, &paths, &[rule]).is_empty());
1029
1030        let yaml_hit = r#"
1031id: r
1032name: r
1033severity: high
1034category: authority_propagation
1035match:
1036  source:
1037    metadata:
1038      role:
1039        not_equals: reader
1040"#;
1041        let rule_hit: CustomRule = serde_yaml::from_str(yaml_hit).expect("yaml parses");
1042        assert_eq!(evaluate_custom_rules(&graph, &paths, &[rule_hit]).len(), 1);
1043    }
1044
1045    #[test]
1046    fn nested_not_collapses_to_inner_condition() {
1047        let (graph, paths) = simple_first_to_untrusted_graph();
1048        // not(not(trust_zone=first_party)) ≡ trust_zone=first_party.
1049        // The source is first_party so this should fire.
1050        let yaml = r#"
1051id: r
1052name: r
1053severity: high
1054category: authority_propagation
1055match:
1056  source:
1057    not:
1058      not:
1059        trust_zone: first_party
1060"#;
1061        let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1062        assert!(!evaluate_custom_rules(&graph, &paths, &[rule]).is_empty());
1063    }
1064
1065    #[test]
1066    fn node_type_accepts_single_value_back_compat() {
1067        // The original v0.4 simple form must still parse and behave identically.
1068        let yaml = r#"
1069id: r
1070name: r
1071severity: high
1072category: authority_propagation
1073match:
1074  source:
1075    node_type: identity
1076    trust_zone: first_party
1077    metadata:
1078      oidc: "true"
1079"#;
1080        let rule: CustomRule = serde_yaml::from_str(yaml).expect("v0.4 form must still parse");
1081        assert!(matches!(
1082            rule.match_spec.source.node_type,
1083            Some(OneOrMany::One(NodeKind::Identity))
1084        ));
1085        assert!(matches!(
1086            rule.match_spec.source.trust_zone,
1087            Some(OneOrMany::One(TrustZone::FirstParty))
1088        ));
1089        let pred = rule
1090            .match_spec
1091            .source
1092            .metadata
1093            .fields
1094            .get("oidc")
1095            .expect("oidc predicate");
1096        assert!(matches!(pred, MetadataPredicate::Equals(v) if v == "true"));
1097
1098        let (graph, paths) = simple_first_to_untrusted_graph();
1099        assert_eq!(evaluate_custom_rules(&graph, &paths, &[rule]).len(), 1);
1100    }
1101
1102    #[test]
1103    fn node_type_accepts_list_form() {
1104        let yaml = r#"
1105id: r
1106name: r
1107severity: high
1108category: authority_propagation
1109match:
1110  source:
1111    node_type: [secret, identity]
1112    trust_zone: [first_party, third_party]
1113"#;
1114        let rule: CustomRule = serde_yaml::from_str(yaml).expect("list form must parse");
1115        match &rule.match_spec.source.node_type {
1116            Some(OneOrMany::Many(v)) => {
1117                assert_eq!(v, &vec![NodeKind::Secret, NodeKind::Identity]);
1118            }
1119            other => panic!("expected list form, got {other:?}"),
1120        }
1121        let (graph, paths) = simple_first_to_untrusted_graph();
1122        assert_eq!(evaluate_custom_rules(&graph, &paths, &[rule]).len(), 1);
1123    }
1124
1125    // ── Gap B: graph-level metadata predicates ──────────────
1126
1127    /// Builds a graph with one PR-context source/sink path and lets tests set
1128    /// graph-level metadata to pressure-test the new predicate.
1129    fn pr_context_graph_with_meta(meta: &[(&str, &str)]) -> (AuthorityGraph, Vec<PropagationPath>) {
1130        let mut g = AuthorityGraph::new(source());
1131        let mut secret_meta = HashMap::new();
1132        secret_meta.insert("variable_group".to_string(), "true".to_string());
1133        let secret = g.add_node_with_metadata(
1134            NodeKind::Secret,
1135            "VG_SECRET",
1136            TrustZone::FirstParty,
1137            secret_meta,
1138        );
1139        let step = g.add_node(NodeKind::Step, "use", TrustZone::FirstParty);
1140        let untrusted = g.add_node(NodeKind::Step, "third-party", TrustZone::Untrusted);
1141        g.add_edge(step, secret, crate::graph::EdgeKind::HasAccessTo);
1142        g.add_edge(step, untrusted, crate::graph::EdgeKind::DelegatesTo);
1143        for (k, v) in meta {
1144            g.metadata.insert((*k).to_string(), (*v).to_string());
1145        }
1146        let paths = propagation_analysis(&g, DEFAULT_MAX_HOPS);
1147        (g, paths)
1148    }
1149
1150    #[test]
1151    fn graph_metadata_equals_matches_when_value_present() {
1152        let (graph, paths) = pr_context_graph_with_meta(&[("trigger", "pr")]);
1153        let yaml = r#"
1154id: r
1155name: r
1156severity: high
1157category: authority_propagation
1158match:
1159  graph_metadata:
1160    trigger:
1161      equals: pr
1162  source:
1163    metadata:
1164      variable_group: "true"
1165"#;
1166        let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1167        assert_eq!(evaluate_custom_rules(&graph, &paths, &[rule]).len(), 1);
1168    }
1169
1170    #[test]
1171    fn graph_metadata_in_matches_any_of_listed_values() {
1172        let (graph, paths) = pr_context_graph_with_meta(&[("trigger", "merge_request_event")]);
1173        let yaml = r#"
1174id: r
1175name: r
1176severity: high
1177category: authority_propagation
1178match:
1179  graph_metadata:
1180    trigger:
1181      in: [pull_request_target, pr, merge_request_event]
1182"#;
1183        let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1184        assert!(!evaluate_custom_rules(&graph, &paths, &[rule]).is_empty());
1185    }
1186
1187    #[test]
1188    fn graph_metadata_negation_excludes_unwanted_trigger() {
1189        // graph trigger=push, rule wants "not push" → must NOT fire.
1190        let (graph, paths) = pr_context_graph_with_meta(&[("trigger", "push")]);
1191        let yaml = r#"
1192id: r
1193name: r
1194severity: high
1195category: authority_propagation
1196match:
1197  graph_metadata:
1198    not:
1199      trigger:
1200        equals: push
1201"#;
1202        let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1203        assert!(evaluate_custom_rules(&graph, &paths, &[rule]).is_empty());
1204
1205        // Inverse: trigger=pr, rule wants "not push" → fires.
1206        let (graph2, paths2) = pr_context_graph_with_meta(&[("trigger", "pr")]);
1207        let rule2: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1208        assert!(!evaluate_custom_rules(&graph2, &paths2, &[rule2]).is_empty());
1209    }
1210
1211    #[test]
1212    fn graph_metadata_missing_key_does_not_match_no_crash() {
1213        // Graph has no `trigger` metadata at all. `equals: pr` requires the key
1214        // to be present with that value → no findings, no panic.
1215        let (graph, paths) = pr_context_graph_with_meta(&[]);
1216        assert!(!graph.metadata.contains_key("trigger"));
1217        let yaml = r#"
1218id: r
1219name: r
1220severity: high
1221category: authority_propagation
1222match:
1223  graph_metadata:
1224    trigger:
1225      equals: pr
1226"#;
1227        let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1228        let findings = evaluate_custom_rules(&graph, &paths, &[rule]);
1229        assert!(findings.is_empty(), "missing key must yield no findings");
1230    }
1231
1232    #[test]
1233    fn rules_without_graph_metadata_remain_backward_compatible() {
1234        // No `graph_metadata:` block → trivially matches regardless of graph
1235        // state. This is the v0.4-v0.9 behaviour and must keep working.
1236        let (graph, paths) = pr_context_graph_with_meta(&[("trigger", "anything")]);
1237        let yaml = r#"
1238id: r
1239name: r
1240severity: high
1241category: authority_propagation
1242match:
1243  source:
1244    metadata:
1245      variable_group: "true"
1246"#;
1247        let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1248        assert_eq!(evaluate_custom_rules(&graph, &paths, &[rule]).len(), 1);
1249    }
1250
1251    // ── Gap C: image sinks + standalone node predicates ─────
1252
1253    /// Builds a graph with one Identity → Step → Image (Untrusted) chain.
1254    /// The Image node is reached via `UsesImage` so propagation_analysis
1255    /// produces a path whose sink is the Image — this is what lets custom
1256    /// rules use `sink: { node_type: image }`.
1257    fn graph_with_image_sink() -> (AuthorityGraph, Vec<PropagationPath>) {
1258        let mut g = AuthorityGraph::new(source());
1259        let identity = g.add_node(NodeKind::Identity, "GH_TOKEN", TrustZone::FirstParty);
1260        let step = g.add_node(NodeKind::Step, "publish", TrustZone::FirstParty);
1261        let image = g.add_node(
1262            NodeKind::Image,
1263            "third-party/deploy@v1",
1264            TrustZone::Untrusted,
1265        );
1266        g.add_edge(step, identity, crate::graph::EdgeKind::HasAccessTo);
1267        g.add_edge(step, image, crate::graph::EdgeKind::UsesImage);
1268        let paths = propagation_analysis(&g, DEFAULT_MAX_HOPS);
1269        (g, paths)
1270    }
1271
1272    #[test]
1273    fn sink_node_type_image_matches_image_path_endpoint() {
1274        let (graph, paths) = graph_with_image_sink();
1275        let yaml = r#"
1276id: r
1277name: r
1278severity: high
1279category: untrusted_with_authority
1280match:
1281  sink:
1282    node_type: image
1283    trust_zone: untrusted
1284"#;
1285        let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1286        let findings = evaluate_custom_rules(&graph, &paths, &[rule]);
1287        assert!(
1288            !findings.is_empty(),
1289            "Image-as-sink must produce at least one finding"
1290        );
1291    }
1292
1293    #[test]
1294    fn standalone_matches_every_floating_image_in_graph() {
1295        // Two Image nodes: one floating (no `digest` metadata), one digest-pinned.
1296        let mut g = AuthorityGraph::new(source());
1297        let _step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
1298        let _floating1 = g.add_node(NodeKind::Image, "alpine:latest", TrustZone::ThirdParty);
1299        let _floating2 = g.add_node(NodeKind::Image, "ubuntu:22.04", TrustZone::ThirdParty);
1300        let mut pinned_meta = HashMap::new();
1301        pinned_meta.insert("digest".to_string(), "sha256:abc".to_string());
1302        let _pinned = g.add_node_with_metadata(
1303            NodeKind::Image,
1304            "alpine@sha256:abc",
1305            TrustZone::ThirdParty,
1306            pinned_meta,
1307        );
1308        // Propagation paths irrelevant for standalone mode.
1309        let paths: Vec<PropagationPath> = Vec::new();
1310
1311        let yaml = r#"
1312id: floating_image_standalone
1313name: Floating image
1314severity: medium
1315category: unpinned_action
1316match:
1317  standalone:
1318    node_type: image
1319    not:
1320      metadata:
1321        digest:
1322          contains: "sha256:"
1323"#;
1324        let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1325        let findings = evaluate_custom_rules(&g, &paths, &[rule]);
1326        assert_eq!(
1327            findings.len(),
1328            2,
1329            "standalone must fire once per floating Image node"
1330        );
1331    }
1332
1333    #[test]
1334    fn standalone_supports_in_operator() {
1335        let mut g = AuthorityGraph::new(source());
1336        let mut self_hosted_meta = HashMap::new();
1337        self_hosted_meta.insert("self_hosted".to_string(), "true".to_string());
1338        let _pool = g.add_node_with_metadata(
1339            NodeKind::Image,
1340            "self-pool",
1341            TrustZone::FirstParty,
1342            self_hosted_meta,
1343        );
1344        let _hosted = g.add_node(NodeKind::Image, "ubuntu-latest", TrustZone::ThirdParty);
1345        let paths: Vec<PropagationPath> = Vec::new();
1346
1347        let yaml = r#"
1348id: r
1349name: r
1350severity: high
1351category: authority_propagation
1352match:
1353  standalone:
1354    node_type: image
1355    metadata:
1356      self_hosted:
1357        in: ["true", "yes"]
1358"#;
1359        let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1360        let findings = evaluate_custom_rules(&g, &paths, &[rule]);
1361        assert_eq!(findings.len(), 1, "in:[\"true\",\"yes\"] matches one node");
1362    }
1363
1364    #[test]
1365    fn standalone_still_honors_graph_metadata_gate() {
1366        // Standalone bypasses source/sink/path but `graph_metadata:` remains
1367        // a precondition — that's how PR-context node-shape rules work.
1368        let mut g_pr = AuthorityGraph::new(source());
1369        g_pr.metadata.insert("trigger".into(), "pr".into());
1370        g_pr.add_node(NodeKind::Image, "alpine:latest", TrustZone::ThirdParty);
1371
1372        let mut g_push = AuthorityGraph::new(source());
1373        g_push.metadata.insert("trigger".into(), "push".into());
1374        g_push.add_node(NodeKind::Image, "alpine:latest", TrustZone::ThirdParty);
1375
1376        let yaml = r#"
1377id: r
1378name: r
1379severity: low
1380category: unpinned_action
1381match:
1382  graph_metadata:
1383    trigger:
1384      equals: pr
1385  standalone:
1386    node_type: image
1387"#;
1388        let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1389        assert_eq!(
1390            evaluate_custom_rules(&g_pr, &[], std::slice::from_ref(&rule)).len(),
1391            1,
1392            "fires on PR graph"
1393        );
1394        assert!(
1395            evaluate_custom_rules(&g_push, &[], std::slice::from_ref(&rule)).is_empty(),
1396            "graph_metadata gate must suppress on push graph"
1397        );
1398    }
1399
1400    #[test]
1401    fn standalone_ignores_source_sink_path_fields() {
1402        // Even when source/sink would never match (no propagation paths exist),
1403        // standalone fires per node-shape match. Documents the precedence rule.
1404        let mut g = AuthorityGraph::new(source());
1405        let _img = g.add_node(NodeKind::Image, "alpine:latest", TrustZone::ThirdParty);
1406        let paths: Vec<PropagationPath> = Vec::new();
1407
1408        let yaml = r#"
1409id: r
1410name: r
1411severity: low
1412category: unpinned_action
1413match:
1414  source:
1415    node_type: secret    # would never match anything in this graph
1416  standalone:
1417    node_type: image
1418"#;
1419        let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1420        let findings = evaluate_custom_rules(&g, &paths, &[rule]);
1421        assert_eq!(findings.len(), 1);
1422    }
1423
1424    // ── Gap A: multi-doc YAML loading ───────────────────────
1425
1426    #[test]
1427    fn multi_doc_yaml_loads_each_document_as_separate_rule() {
1428        let yaml = r#"
1429id: rule_a
1430name: First rule
1431severity: high
1432category: authority_propagation
1433match:
1434  source:
1435    node_type: secret
1436---
1437id: rule_b
1438name: Second rule
1439severity: critical
1440category: untrusted_with_authority
1441match:
1442  sink:
1443    trust_zone: untrusted
1444---
1445id: rule_c
1446name: Third rule
1447severity: medium
1448category: unpinned_action
1449"#;
1450        let rules = parse_rules_multi_doc(yaml).expect("multi-doc must parse");
1451        assert_eq!(rules.len(), 3, "expected 3 rules from 3-doc YAML");
1452        assert_eq!(rules[0].id, "rule_a");
1453        assert_eq!(rules[1].id, "rule_b");
1454        assert_eq!(rules[2].id, "rule_c");
1455        assert_eq!(rules[1].severity, Severity::Critical);
1456    }
1457
1458    #[test]
1459    fn single_doc_yaml_still_loads_identically() {
1460        let yaml = r#"
1461id: solo
1462name: Solo rule
1463severity: high
1464category: authority_propagation
1465"#;
1466        let rules = parse_rules_multi_doc(yaml).expect("single-doc must parse");
1467        assert_eq!(rules.len(), 1);
1468        assert_eq!(rules[0].id, "solo");
1469    }
1470
1471    #[test]
1472    fn multi_doc_with_empty_leading_document_is_skipped() {
1473        let yaml = r#"---
1474---
1475id: only
1476name: only
1477severity: low
1478category: authority_propagation
1479"#;
1480        let rules = parse_rules_multi_doc(yaml).expect("must parse");
1481        assert_eq!(rules.len(), 1);
1482        assert_eq!(rules[0].id, "only");
1483    }
1484
1485    #[test]
1486    fn load_rules_dir_loads_multi_doc_files() {
1487        let tmp =
1488            std::env::temp_dir().join(format!("taudit-custom-rules-multi-{}", std::process::id()));
1489        fs::create_dir_all(&tmp).unwrap();
1490        let path = tmp.join("bundle.yml");
1491        fs::write(
1492            &path,
1493            r#"
1494id: a
1495name: a
1496severity: high
1497category: authority_propagation
1498---
1499id: b
1500name: b
1501severity: medium
1502category: unpinned_action
1503---
1504id: c
1505name: c
1506severity: low
1507category: authority_propagation
1508"#,
1509        )
1510        .unwrap();
1511
1512        let rules = load_rules_dir(&tmp).expect("multi-doc file must load");
1513        assert_eq!(rules.len(), 3, "expected 3 rules from one bundled file");
1514
1515        let _ = fs::remove_dir_all(&tmp);
1516    }
1517
1518    // ── Provenance: every custom-rule finding carries source path ────────
1519
1520    #[test]
1521    fn loaded_rule_threads_source_file_into_findings() {
1522        let tmp = std::env::temp_dir().join(format!("taudit-custom-prov-{}", std::process::id()));
1523        fs::create_dir_all(&tmp).unwrap();
1524        let path = tmp.join("provenance.yml");
1525        fs::write(
1526            &path,
1527            r#"
1528id: from_disk
1529name: From disk
1530description: planted invariant
1531severity: critical
1532category: authority_propagation
1533match:
1534  source:
1535    trust_zone: first_party
1536  sink:
1537    trust_zone: untrusted
1538"#,
1539        )
1540        .unwrap();
1541
1542        let rules = load_rules_dir(&tmp).expect("rules load");
1543        assert_eq!(rules.len(), 1);
1544        // The loader stamps source_file on the rule itself.
1545        assert_eq!(rules[0].source_file.as_deref(), Some(path.as_path()));
1546
1547        let (graph, paths) = build_graph_with_paths();
1548        let findings = evaluate_custom_rules(&graph, &paths, &rules);
1549        assert_eq!(findings.len(), 1);
1550        match &findings[0].source {
1551            FindingSource::Custom { source_file } => {
1552                assert_eq!(
1553                    source_file, &path,
1554                    "custom finding must carry the YAML path it was loaded from"
1555                );
1556            }
1557            other => panic!("expected FindingSource::Custom, got {other:?}"),
1558        }
1559
1560        let _ = fs::remove_dir_all(&tmp);
1561    }
1562
1563    #[test]
1564    fn in_memory_custom_rule_emits_custom_source_with_empty_path() {
1565        // Rules constructed in-memory (tests, stdin pipelines) never go
1566        // through the loader and therefore have no source path — the finding
1567        // must still be tagged as Custom (not silently mistakable for built-in)
1568        // so any operator inspecting a SIEM alert immediately sees provenance.
1569        let (graph, paths) = build_graph_with_paths();
1570        let rule = CustomRule {
1571            id: "in_mem".into(),
1572            name: "in-memory".into(),
1573            description: String::new(),
1574            severity: Severity::High,
1575            category: FindingCategory::AuthorityPropagation,
1576            match_spec: MatchSpec::default(),
1577            source_file: None,
1578        };
1579        let findings = evaluate_custom_rules(&graph, &paths, &[rule]);
1580        assert!(!findings.is_empty(), "in-mem rule must still match");
1581        for f in &findings {
1582            match &f.source {
1583                FindingSource::Custom { source_file } => {
1584                    assert!(
1585                        source_file.as_os_str().is_empty(),
1586                        "in-mem custom rule emits Custom with empty path, not BuiltIn"
1587                    );
1588                }
1589                other => {
1590                    panic!("in-memory custom rule must still produce Custom source, got {other:?}")
1591                }
1592            }
1593        }
1594    }
1595
1596    #[test]
1597    fn unknown_metadata_operator_is_rejected() {
1598        let yaml = r#"
1599id: r
1600name: r
1601severity: high
1602category: authority_propagation
1603match:
1604  source:
1605    metadata:
1606      role:
1607        starts_with: adm
1608"#;
1609        let err = serde_yaml::from_str::<CustomRule>(yaml)
1610            .expect_err("unknown operator must be rejected");
1611        let msg = err.to_string();
1612        // serde_yaml's untagged-enum error doesn't always echo the unknown
1613        // field name; the important guarantee is that the parse fails (so
1614        // typos in operator names don't silently match nothing).
1615        assert!(
1616            msg.contains("metadata") || msg.contains("variant"),
1617            "parse should fail with a meaningful location: {msg}"
1618        );
1619    }
1620
1621    // ── Symlink protection (red-team R2 #4) ─────────────────
1622    //
1623    // These tests use Unix symlinks. Skipped on Windows where the test
1624    // harness usually lacks SeCreateSymbolicLinkPrivilege.
1625
1626    #[cfg(unix)]
1627    fn unique_tmp(prefix: &str) -> PathBuf {
1628        use std::sync::atomic::{AtomicU64, Ordering};
1629        static COUNTER: AtomicU64 = AtomicU64::new(0);
1630        let n = COUNTER.fetch_add(1, Ordering::SeqCst);
1631        std::env::temp_dir().join(format!(
1632            "taudit-symlink-{prefix}-{}-{n}",
1633            std::process::id()
1634        ))
1635    }
1636
1637    #[cfg(unix)]
1638    fn write_minimal_rule(path: &Path, id: &str) {
1639        fs::write(
1640            path,
1641            format!("id: {id}\nname: {id}\nseverity: high\ncategory: authority_propagation\n"),
1642        )
1643        .unwrap();
1644    }
1645
1646    #[test]
1647    #[cfg(unix)]
1648    fn load_rules_dir_follows_in_tree_symlink_with_warning() {
1649        use std::os::unix::fs::symlink;
1650
1651        let tmp = unique_tmp("intree");
1652        fs::create_dir_all(&tmp).unwrap();
1653
1654        let real = tmp.join("real.yml");
1655        write_minimal_rule(&real, "in_tree");
1656        let link = tmp.join("alias.yml");
1657        symlink(&real, &link).unwrap();
1658
1659        // Default opts: in-tree symlinks are followed.
1660        let rules = load_rules_dir(&tmp).expect("in-tree symlink must be loaded");
1661        // Two entries: the real file + the alias symlink (loaded twice).
1662        assert_eq!(
1663            rules.len(),
1664            2,
1665            "expected 2 rules (real + alias), got {rules:?}"
1666        );
1667
1668        let _ = fs::remove_dir_all(&tmp);
1669    }
1670
1671    #[test]
1672    #[cfg(unix)]
1673    fn load_rules_dir_refuses_out_of_tree_symlink_by_default() {
1674        use std::os::unix::fs::symlink;
1675
1676        let tmp = unique_tmp("outoftree-refuse");
1677        fs::create_dir_all(&tmp).unwrap();
1678
1679        let outside_dir = unique_tmp("outoftree-target");
1680        fs::create_dir_all(&outside_dir).unwrap();
1681        let outside_file = outside_dir.join("evil.yml");
1682        write_minimal_rule(&outside_file, "evil");
1683
1684        let link = tmp.join("legit.yml");
1685        symlink(&outside_file, &link).unwrap();
1686
1687        let errs = load_rules_dir(&tmp).expect_err("out-of-tree symlink must be refused");
1688        assert_eq!(errs.len(), 1);
1689        assert!(
1690            matches!(errs[0], CustomRuleError::SymlinkOutsideDir { .. }),
1691            "expected SymlinkOutsideDir, got {:?}",
1692            errs[0]
1693        );
1694        let msg = errs[0].to_string();
1695        assert!(
1696            msg.contains("legit.yml") && msg.contains("evil.yml"),
1697            "error should name both link and target: {msg}"
1698        );
1699
1700        let _ = fs::remove_dir_all(&tmp);
1701        let _ = fs::remove_dir_all(&outside_dir);
1702    }
1703
1704    #[test]
1705    #[cfg(unix)]
1706    fn load_rules_dir_follows_out_of_tree_symlink_with_override() {
1707        use std::os::unix::fs::symlink;
1708
1709        let tmp = unique_tmp("outoftree-override");
1710        fs::create_dir_all(&tmp).unwrap();
1711
1712        let outside_dir = unique_tmp("outoftree-target-override");
1713        fs::create_dir_all(&outside_dir).unwrap();
1714        let outside_file = outside_dir.join("external.yml");
1715        write_minimal_rule(&outside_file, "external");
1716
1717        let link = tmp.join("aliased.yml");
1718        symlink(&outside_file, &link).unwrap();
1719
1720        let rules = load_rules_dir_with_opts(&tmp, true)
1721            .expect("override flag must allow external symlinks");
1722        assert_eq!(rules.len(), 1);
1723        assert_eq!(rules[0].id, "external");
1724
1725        let _ = fs::remove_dir_all(&tmp);
1726        let _ = fs::remove_dir_all(&outside_dir);
1727    }
1728}