1use crate::finding::{
2 Finding, FindingCategory, FindingExtras, FindingSource, Recommendation, Severity,
3};
4use crate::graph::{AuthorityGraph, NodeKind, TrustZone};
5use crate::propagation::PropagationPath;
6use serde::de::{self, MapAccess, Visitor};
7use serde::{Deserialize, Deserializer};
8use std::collections::{HashMap, HashSet};
9use std::fmt;
10use std::fs;
11use std::io;
12use std::path::{Path, PathBuf};
13
14const MAX_INPUT_BYTES: u64 = 2 * 1024 * 1024;
15const MAX_CUSTOM_RULE_VEC_ITEMS: usize = 1024;
16
17const RULE_ID_REGEX: &str = "^[A-Za-z_][A-Za-z0-9_-]{0,63}$";
20
21fn validate_rule_id(id: &str) -> Result<(), String> {
26 if id.is_empty() {
27 return Err(format!(
28 "rule id must be non-empty (allowed: {RULE_ID_REGEX})"
29 ));
30 }
31 if id.len() > 64 {
32 return Err(format!(
33 "rule id '{id}' exceeds 64 characters (allowed: {RULE_ID_REGEX})"
34 ));
35 }
36 let mut chars = id.chars();
37 let first = chars.next().expect("id non-empty checked above");
38 if !(first.is_ascii_alphabetic() || first == '_') {
39 return Err(format!(
40 "rule id '{id}' must start with an ASCII letter or underscore (allowed: {RULE_ID_REGEX})"
41 ));
42 }
43 for c in chars {
44 if !(c.is_ascii_alphanumeric() || c == '_' || c == '-') {
45 return Err(format!(
46 "rule id '{id}' contains invalid character '{c}' (allowed: ASCII letters, digits, underscore, hyphen — {RULE_ID_REGEX})"
47 ));
48 }
49 }
50 Ok(())
51}
52
53fn deserialize_validated_id<'de, D>(deserializer: D) -> Result<String, D::Error>
59where
60 D: Deserializer<'de>,
61{
62 let raw = String::deserialize(deserializer)?;
63 validate_rule_id(&raw).map_err(de::Error::custom)?;
64 Ok(raw)
65}
66
67fn deserialize_capped_vec<'de, D, T>(deserializer: D) -> Result<Vec<T>, D::Error>
68where
69 D: Deserializer<'de>,
70 T: Deserialize<'de>,
71{
72 let values = Vec::<T>::deserialize(deserializer)?;
73 if values.len() > MAX_CUSTOM_RULE_VEC_ITEMS {
74 return Err(de::Error::custom(format!(
75 "custom-rule list exceeds {MAX_CUSTOM_RULE_VEC_ITEMS} entries"
76 )));
77 }
78 Ok(values)
79}
80
81#[derive(Debug, Clone, Deserialize)]
84pub struct CustomRule {
85 #[serde(deserialize_with = "deserialize_validated_id")]
91 pub id: String,
92 pub name: String,
93 #[serde(default)]
94 pub description: String,
95 pub severity: Severity,
96 pub category: FindingCategory,
97 #[serde(rename = "match", default)]
98 pub match_spec: MatchSpec,
99 #[serde(default, skip)]
107 pub source_file: Option<PathBuf>,
108}
109
110#[derive(Debug, Clone, Default, Deserialize)]
111pub struct MatchSpec {
112 #[serde(default)]
113 pub source: NodeMatcher,
114 #[serde(default)]
115 pub sink: NodeMatcher,
116 #[serde(default)]
117 pub path: PathMatcher,
118 #[serde(default)]
124 pub graph_metadata: MetadataMatcher,
125 #[serde(default)]
132 pub standalone: Option<NodeMatcher>,
133}
134
135pub const MAX_RULE_VEC_LEN: usize = 1024;
143
144#[derive(Debug, Clone)]
151pub enum OneOrMany<T> {
152 One(T),
153 Many(Vec<T>),
154}
155
156impl<'de, T> Deserialize<'de> for OneOrMany<T>
168where
169 T: Deserialize<'de>,
170{
171 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
172 where
173 D: Deserializer<'de>,
174 {
175 struct OneOrManyVisitor<T> {
176 _phantom: std::marker::PhantomData<T>,
177 }
178
179 impl<'de, T> Visitor<'de> for OneOrManyVisitor<T>
180 where
181 T: Deserialize<'de>,
182 {
183 type Value = OneOrMany<T>;
184
185 fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
186 f.write_str("a single value or a sequence (max 1024 elements)")
187 }
188
189 fn visit_bool<E: de::Error>(self, v: bool) -> Result<Self::Value, E> {
195 T::deserialize(serde::de::value::BoolDeserializer::new(v)).map(OneOrMany::One)
196 }
197 fn visit_i64<E: de::Error>(self, v: i64) -> Result<Self::Value, E> {
198 T::deserialize(serde::de::value::I64Deserializer::new(v)).map(OneOrMany::One)
199 }
200 fn visit_u64<E: de::Error>(self, v: u64) -> Result<Self::Value, E> {
201 T::deserialize(serde::de::value::U64Deserializer::new(v)).map(OneOrMany::One)
202 }
203 fn visit_f64<E: de::Error>(self, v: f64) -> Result<Self::Value, E> {
204 T::deserialize(serde::de::value::F64Deserializer::new(v)).map(OneOrMany::One)
205 }
206 fn visit_str<E: de::Error>(self, v: &str) -> Result<Self::Value, E> {
207 T::deserialize(serde::de::value::StrDeserializer::new(v)).map(OneOrMany::One)
208 }
209 fn visit_string<E: de::Error>(self, v: String) -> Result<Self::Value, E> {
210 T::deserialize(serde::de::value::StringDeserializer::new(v)).map(OneOrMany::One)
211 }
212 fn visit_unit<E: de::Error>(self) -> Result<Self::Value, E> {
213 T::deserialize(serde::de::value::UnitDeserializer::new()).map(OneOrMany::One)
214 }
215
216 fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
217 where
218 A: de::SeqAccess<'de>,
219 {
220 let cap_hint = seq
224 .size_hint()
225 .map(|h| h.min(MAX_RULE_VEC_LEN))
226 .unwrap_or(0);
227 let mut out: Vec<T> = Vec::with_capacity(cap_hint);
228 while let Some(item) = seq.next_element::<T>()? {
229 if out.len() >= MAX_RULE_VEC_LEN {
230 return Err(de::Error::custom(format!(
231 "list field exceeds maximum of {MAX_RULE_VEC_LEN} elements; \
232 split into multiple rules instead"
233 )));
234 }
235 out.push(item);
236 }
237 Ok(OneOrMany::Many(out))
238 }
239 }
240
241 deserializer.deserialize_any(OneOrManyVisitor::<T> {
242 _phantom: std::marker::PhantomData,
243 })
244 }
245}
246
247impl<T: PartialEq> OneOrMany<T> {
248 fn contains(&self, value: &T) -> bool {
249 match self {
250 OneOrMany::One(v) => v == value,
251 OneOrMany::Many(vs) => vs.iter().any(|v| v == value),
252 }
253 }
254}
255
256#[derive(Debug, Clone, Deserialize)]
260#[serde(untagged)]
261pub enum MetadataPredicate {
262 Equals(String),
264 Op(MetadataOp),
266}
267
268#[derive(Debug, Clone, Default, Deserialize)]
269#[serde(deny_unknown_fields)]
270pub struct MetadataOp {
271 #[serde(default)]
272 pub equals: Option<String>,
273 #[serde(default)]
274 pub not_equals: Option<String>,
275 #[serde(default)]
277 pub contains: Option<String>,
278 #[serde(
282 default,
283 rename = "in",
284 deserialize_with = "deserialize_capped_opt_vec_string"
285 )]
286 pub in_: Option<Vec<String>>,
287}
288
289fn deserialize_capped_opt_vec_string<'de, D>(
294 deserializer: D,
295) -> Result<Option<Vec<String>>, D::Error>
296where
297 D: Deserializer<'de>,
298{
299 struct CappedVecStringVisitor;
300
301 impl<'de> Visitor<'de> for CappedVecStringVisitor {
302 type Value = Option<Vec<String>>;
303
304 fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
305 f.write_str("a sequence of strings (max 1024 elements) or null")
306 }
307
308 fn visit_unit<E: de::Error>(self) -> Result<Self::Value, E> {
309 Ok(None)
310 }
311
312 fn visit_none<E: de::Error>(self) -> Result<Self::Value, E> {
313 Ok(None)
314 }
315
316 fn visit_some<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
317 where
318 D: Deserializer<'de>,
319 {
320 deserializer.deserialize_seq(self)
321 }
322
323 fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
324 where
325 A: de::SeqAccess<'de>,
326 {
327 let cap_hint = seq
328 .size_hint()
329 .map(|h| h.min(MAX_RULE_VEC_LEN))
330 .unwrap_or(0);
331 let mut out: Vec<String> = Vec::with_capacity(cap_hint);
332 while let Some(item) = seq.next_element::<String>()? {
333 if out.len() >= MAX_RULE_VEC_LEN {
334 return Err(de::Error::custom(format!(
335 "metadata `in:` list exceeds maximum of {MAX_RULE_VEC_LEN} \
336 elements; split into multiple rules instead"
337 )));
338 }
339 out.push(item);
340 }
341 Ok(Some(out))
342 }
343 }
344
345 deserializer.deserialize_option(CappedVecStringVisitor)
346}
347
348impl MetadataOp {
349 fn matches(&self, actual: Option<&String>) -> bool {
350 if let Some(want) = &self.equals {
353 if actual.map(|s| s.as_str()) != Some(want.as_str()) {
354 return false;
355 }
356 }
357 if let Some(want) = &self.not_equals {
358 if actual.map(|s| s.as_str()) == Some(want.as_str()) {
359 return false;
360 }
361 }
362 if let Some(needle) = &self.contains {
363 match actual {
364 Some(s) if s.contains(needle.as_str()) => {}
365 _ => return false,
366 }
367 }
368 if let Some(allowed) = &self.in_ {
369 match actual {
370 Some(s) if allowed.iter().any(|a| a == s) => {}
371 _ => return false,
372 }
373 }
374 true
375 }
376}
377
378impl MetadataPredicate {
379 fn matches(&self, actual: Option<&String>) -> bool {
380 match self {
381 MetadataPredicate::Equals(want) => actual.map(|s| s.as_str()) == Some(want.as_str()),
382 MetadataPredicate::Op(op) => op.matches(actual),
383 }
384 }
385}
386
387#[derive(Debug, Clone, Default)]
391pub struct MetadataMatcher {
392 pub fields: HashMap<String, MetadataPredicate>,
393 pub not: Option<Box<MetadataMatcher>>,
394}
395
396impl MetadataMatcher {
397 fn matches(&self, metadata: &HashMap<String, String>) -> bool {
398 for (key, pred) in &self.fields {
399 if !pred.matches(metadata.get(key)) {
400 return false;
401 }
402 }
403 if let Some(inner) = &self.not {
404 if inner.matches(metadata) {
405 return false;
406 }
407 }
408 true
409 }
410
411 fn is_empty(&self) -> bool {
412 self.fields.is_empty() && self.not.is_none()
413 }
414}
415
416impl<'de> Deserialize<'de> for MetadataMatcher {
418 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
419 where
420 D: Deserializer<'de>,
421 {
422 struct MetadataMatcherVisitor;
423
424 impl<'de> Visitor<'de> for MetadataMatcherVisitor {
425 type Value = MetadataMatcher;
426
427 fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
428 f.write_str("a metadata predicate map (field -> string|operator) with optional `not:` sub-map")
429 }
430
431 fn visit_map<M>(self, mut map: M) -> Result<MetadataMatcher, M::Error>
432 where
433 M: MapAccess<'de>,
434 {
435 let mut fields: HashMap<String, MetadataPredicate> = HashMap::new();
436 let mut not: Option<Box<MetadataMatcher>> = None;
437
438 while let Some(key) = map.next_key::<String>()? {
439 if key == "not" {
440 if not.is_some() {
441 return Err(de::Error::duplicate_field("not"));
442 }
443 let inner: MetadataMatcher = map.next_value()?;
444 not = Some(Box::new(inner));
445 } else {
446 let value: MetadataPredicate = map.next_value()?;
447 if fields.insert(key.clone(), value).is_some() {
448 return Err(de::Error::custom(format!(
449 "duplicate metadata field `{key}`"
450 )));
451 }
452 }
453 }
454
455 Ok(MetadataMatcher { fields, not })
456 }
457 }
458
459 deserializer.deserialize_map(MetadataMatcherVisitor)
460 }
461}
462
463#[derive(Debug, Clone, Default, Deserialize)]
464pub struct NodeMatcher {
465 #[serde(default)]
467 pub node_type: Option<OneOrMany<NodeKind>>,
468 #[serde(default)]
470 pub trust_zone: Option<OneOrMany<TrustZone>>,
471 #[serde(default)]
472 pub metadata: MetadataMatcher,
473 #[serde(default)]
476 pub not: Option<Box<NodeMatcher>>,
477}
478
479#[derive(Debug, Clone, Default, Deserialize)]
480pub struct PathMatcher {
481 #[serde(default, deserialize_with = "deserialize_capped_vec")]
482 pub crosses_to: Vec<TrustZone>,
483}
484
485#[derive(Debug)]
486pub enum CustomRuleError {
487 FileRead(PathBuf, io::Error),
488 YamlParse(PathBuf, serde_yaml::Error),
489 FileTooLarge {
490 path: PathBuf,
491 max_bytes: u64,
492 actual_bytes: u64,
493 },
494 SymlinkOutsideDir {
499 link: PathBuf,
500 target: PathBuf,
501 },
502}
503
504impl fmt::Display for CustomRuleError {
505 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
506 match self {
507 CustomRuleError::FileRead(path, err) => {
508 write!(
509 f,
510 "failed to read custom rule file {}: {err}",
511 path.display()
512 )
513 }
514 CustomRuleError::YamlParse(path, err) => {
515 write!(
516 f,
517 "failed to parse custom rule file {}: {err}",
518 path.display()
519 )
520 }
521 CustomRuleError::FileTooLarge {
522 path,
523 max_bytes,
524 actual_bytes,
525 } => {
526 write!(
527 f,
528 "custom rule file {} exceeds {max_bytes} byte limit ({actual_bytes} bytes)",
529 path.display()
530 )
531 }
532 CustomRuleError::SymlinkOutsideDir { link, target } => {
533 write!(
534 f,
535 "refusing to follow symlink {} → {} (target outside --invariants-dir; potential symlink traversal). Use --invariants-allow-external-symlinks to override.",
536 link.display(),
537 target.display()
538 )
539 }
540 }
541 }
542}
543
544impl std::error::Error for CustomRuleError {
545 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
546 match self {
547 CustomRuleError::FileRead(_, err) => Some(err),
548 CustomRuleError::YamlParse(_, err) => Some(err),
549 CustomRuleError::FileTooLarge { .. } => None,
550 CustomRuleError::SymlinkOutsideDir { .. } => None,
551 }
552 }
553}
554
555pub fn load_rules_dir(dir: &Path) -> Result<Vec<CustomRule>, Vec<CustomRuleError>> {
563 load_rules_dir_with_opts(dir, false)
564}
565
566pub fn load_rules_dir_with_opts(
591 dir: &Path,
592 allow_external_symlinks: bool,
593) -> Result<Vec<CustomRule>, Vec<CustomRuleError>> {
594 let canonical_dir = fs::canonicalize(dir).unwrap_or_else(|_| dir.to_path_buf());
599
600 let mut errors: Vec<CustomRuleError> = Vec::new();
601 let mut files: Vec<(PathBuf, PathBuf)> = Vec::new();
606 let mut seen: HashSet<PathBuf> = HashSet::new();
607 let mut visited_dirs: HashSet<PathBuf> = HashSet::new();
610 visited_dirs.insert(canonical_dir.clone());
611
612 let mut stack: Vec<PathBuf> = vec![dir.to_path_buf()];
613
614 while let Some(current) = stack.pop() {
615 let read_dir = match fs::read_dir(¤t) {
616 Ok(rd) => rd,
617 Err(err) => {
618 errors.push(CustomRuleError::FileRead(current, err));
619 continue;
620 }
621 };
622
623 for entry in read_dir.flatten() {
626 let path = entry.path();
627
628 let is_symlink = entry
631 .file_type()
632 .map(|ft| ft.is_symlink())
633 .unwrap_or_else(|_| path.is_symlink());
634
635 let canonical_target = match fs::canonicalize(&path) {
639 Ok(t) => t,
640 Err(err) => {
641 errors.push(CustomRuleError::FileRead(path.clone(), err));
642 continue;
643 }
644 };
645
646 if is_symlink {
649 let in_tree = canonical_target.starts_with(&canonical_dir);
650 if !in_tree {
651 if allow_external_symlinks {
652 eprintln!(
653 "WARNING: following external symlink {} → {} (allowed by --invariants-allow-external-symlinks)",
654 path.display(),
655 canonical_target.display()
656 );
657 } else {
658 errors.push(CustomRuleError::SymlinkOutsideDir {
659 link: path,
660 target: canonical_target,
661 });
662 continue;
663 }
664 } else {
665 eprintln!(
666 "WARNING: following symlink {} → {}",
667 path.display(),
668 canonical_target.display()
669 );
670 }
671 }
672
673 let meta = match fs::metadata(&path) {
676 Ok(m) => m,
677 Err(err) => {
678 errors.push(CustomRuleError::FileRead(path.clone(), err));
679 continue;
680 }
681 };
682
683 if meta.is_dir() {
684 if visited_dirs.insert(canonical_target.clone()) {
687 stack.push(path);
688 }
689 continue;
690 }
691
692 if !meta.is_file() {
693 continue;
694 }
695 match path.extension().and_then(|e| e.to_str()) {
696 Some("yml") | Some("yaml") => {}
697 _ => continue,
698 }
699
700 if !seen.insert(canonical_target.clone()) {
703 eprintln!(
704 "WARNING: symlink {} resolved to the same file already loaded; skipping",
705 path.display()
706 );
707 continue;
708 }
709 files.push((path, canonical_target));
710 }
711 }
712
713 files.sort_by(|a, b| a.0.cmp(&b.0));
716
717 let mut rules = Vec::new();
718 for (path, _canonical) in files {
719 match read_to_string_capped(&path) {
720 Ok(content) => match parse_rules_multi_doc_with_source(&content, Some(&path)) {
721 Ok(mut parsed) => rules.append(&mut parsed),
722 Err(err) => errors.push(CustomRuleError::YamlParse(path, err)),
723 },
724 Err(err) => errors.push(err),
725 }
726 }
727
728 if errors.is_empty() {
729 Ok(rules)
730 } else {
731 Err(errors)
732 }
733}
734
735fn read_to_string_capped(path: &Path) -> Result<String, CustomRuleError> {
736 let metadata = fs::metadata(path).map_err(|err| CustomRuleError::FileRead(path.into(), err))?;
737 if metadata.len() > MAX_INPUT_BYTES {
738 return Err(CustomRuleError::FileTooLarge {
739 path: path.into(),
740 max_bytes: MAX_INPUT_BYTES,
741 actual_bytes: metadata.len(),
742 });
743 }
744 let content =
745 fs::read_to_string(path).map_err(|err| CustomRuleError::FileRead(path.into(), err))?;
746 if content.len() as u64 > MAX_INPUT_BYTES {
747 return Err(CustomRuleError::FileTooLarge {
748 path: path.into(),
749 max_bytes: MAX_INPUT_BYTES,
750 actual_bytes: content.len() as u64,
751 });
752 }
753 Ok(content)
754}
755
756pub fn parse_rules_multi_doc(content: &str) -> Result<Vec<CustomRule>, serde_yaml::Error> {
765 parse_rules_multi_doc_with_source(content, None)
766}
767
768pub fn parse_rules_multi_doc_with_source(
773 content: &str,
774 source: Option<&Path>,
775) -> Result<Vec<CustomRule>, serde_yaml::Error> {
776 let mut rules = Vec::new();
777 for doc in serde_yaml::Deserializer::from_str(content) {
778 let value = serde_yaml::Value::deserialize(doc)?;
781 if value.is_null() {
782 continue;
783 }
784 let mut rule: CustomRule = serde_yaml::from_value(value)?;
785 rule.source_file = source.map(|p| p.to_path_buf());
786 rules.push(rule);
787 }
788 Ok(rules)
789}
790
791impl NodeMatcher {
792 fn matches(&self, node: &crate::graph::Node) -> bool {
793 if let Some(kinds) = &self.node_type {
794 if !kinds.contains(&node.kind) {
795 return false;
796 }
797 }
798 if let Some(zones) = &self.trust_zone {
799 if !zones.contains(&node.trust_zone) {
800 return false;
801 }
802 }
803 if !self.metadata.matches(&node.metadata) {
804 return false;
805 }
806 if let Some(inner) = &self.not {
807 if inner.matches(node) {
808 return false;
809 }
810 }
811 true
812 }
813
814 #[allow(dead_code)]
816 fn is_wildcard(&self) -> bool {
817 self.node_type.is_none()
818 && self.trust_zone.is_none()
819 && self.metadata.is_empty()
820 && self.not.is_none()
821 }
822}
823
824impl PathMatcher {
825 fn matches(&self, path: &PropagationPath) -> bool {
826 if self.crosses_to.is_empty() {
827 return true;
828 }
829 match path.boundary_crossing {
830 Some((_, to_zone)) => self.crosses_to.contains(&to_zone),
831 None => false,
832 }
833 }
834}
835
836pub fn evaluate_custom_rules(
840 graph: &AuthorityGraph,
841 paths: &[PropagationPath],
842 rules: &[CustomRule],
843) -> Vec<Finding> {
844 let mut findings = Vec::new();
845
846 for rule in rules {
847 if let Some(matcher) = &rule.match_spec.standalone {
853 if !rule.match_spec.graph_metadata.matches(&graph.metadata) {
854 continue;
855 }
856 for node in &graph.nodes {
857 if !matcher.matches(node) {
858 continue;
859 }
860 findings.push(Finding {
861 severity: rule.severity,
862 category: rule.category,
863 nodes_involved: vec![node.id],
864 message: format!("[{}] {}: {}", rule.id, rule.name, node.name),
865 recommendation: Recommendation::Manual {
866 action: if rule.description.is_empty() {
867 format!("Review custom rule '{}'", rule.id)
868 } else {
869 rule.description.clone()
870 },
871 },
872 path: None,
873 source: custom_source(rule),
874 extras: FindingExtras::default(),
875 });
876 }
877 continue;
878 }
879
880 if !rule.match_spec.graph_metadata.matches(&graph.metadata) {
885 continue;
886 }
887
888 for path in paths {
889 let source_node = match graph.node(path.source) {
890 Some(n) => n,
891 None => continue,
892 };
893 let sink_node = match graph.node(path.sink) {
894 Some(n) => n,
895 None => continue,
896 };
897
898 if !rule.match_spec.source.matches(source_node) {
899 continue;
900 }
901 if !rule.match_spec.sink.matches(sink_node) {
902 continue;
903 }
904 if !rule.match_spec.path.matches(path) {
905 continue;
906 }
907
908 findings.push(Finding {
909 severity: rule.severity,
910 category: rule.category,
911 nodes_involved: vec![path.source, path.sink],
912 message: format!(
913 "[{}] {}: {} -> {}",
914 rule.id, rule.name, source_node.name, sink_node.name
915 ),
916 recommendation: Recommendation::Manual {
917 action: if rule.description.is_empty() {
918 format!("Review custom rule '{}'", rule.id)
919 } else {
920 rule.description.clone()
921 },
922 },
923 path: Some(path.clone()),
924 source: custom_source(rule),
925 extras: FindingExtras::default(),
926 });
927 }
928 }
929
930 findings
931}
932
933fn custom_source(rule: &CustomRule) -> FindingSource {
938 FindingSource::Custom {
939 source_file: rule.source_file.clone().unwrap_or_default(),
940 }
941}
942
943#[cfg(test)]
944mod tests {
945 use super::*;
946 use crate::graph::{AuthorityGraph, EdgeKind, PipelineSource};
947 use crate::propagation::{propagation_analysis, DEFAULT_MAX_HOPS};
948
949 fn source() -> PipelineSource {
950 PipelineSource {
951 file: "test.yml".into(),
952 repo: None,
953 git_ref: None,
954 commit_sha: None,
955 }
956 }
957
958 fn build_graph_with_paths() -> (AuthorityGraph, Vec<PropagationPath>) {
959 let mut g = AuthorityGraph::new(source());
960 let secret = g.add_node(NodeKind::Secret, "API_KEY", TrustZone::FirstParty);
961 let trusted = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
962 let untrusted = g.add_node(NodeKind::Step, "third-party", TrustZone::Untrusted);
963
964 g.add_edge(trusted, secret, EdgeKind::HasAccessTo);
965 g.add_edge(trusted, untrusted, EdgeKind::DelegatesTo);
966
967 let paths = propagation_analysis(&g, DEFAULT_MAX_HOPS);
968 (g, paths)
969 }
970
971 fn one<T>(v: T) -> Option<OneOrMany<T>> {
972 Some(OneOrMany::One(v))
973 }
974
975 #[test]
976 fn custom_rule_fires_on_matching_path() {
977 let (graph, paths) = build_graph_with_paths();
978
979 let rule = CustomRule {
980 id: "secret_to_untrusted".into(),
981 name: "Secret reaching untrusted step".into(),
982 description: "Custom policy".into(),
983 severity: Severity::Critical,
984 category: FindingCategory::AuthorityPropagation,
985 match_spec: MatchSpec {
986 source: NodeMatcher {
987 node_type: None,
988 trust_zone: one(TrustZone::FirstParty),
989 metadata: MetadataMatcher::default(),
990 not: None,
991 },
992 sink: NodeMatcher {
993 node_type: None,
994 trust_zone: one(TrustZone::Untrusted),
995 metadata: MetadataMatcher::default(),
996 not: None,
997 },
998 path: PathMatcher::default(),
999 graph_metadata: MetadataMatcher::default(),
1000 standalone: None,
1001 },
1002 source_file: None,
1003 };
1004
1005 let findings = evaluate_custom_rules(&graph, &paths, &[rule]);
1006 assert_eq!(findings.len(), 1);
1007 assert_eq!(findings[0].severity, Severity::Critical);
1008 assert!(findings[0].message.contains("secret_to_untrusted"));
1009 }
1010
1011 #[test]
1012 fn custom_rule_does_not_fire_when_predicates_miss() {
1013 let (graph, paths) = build_graph_with_paths();
1014
1015 let rule = CustomRule {
1016 id: "miss".into(),
1017 name: "Untrusted source".into(),
1018 description: String::new(),
1019 severity: Severity::Critical,
1020 category: FindingCategory::AuthorityPropagation,
1021 match_spec: MatchSpec {
1022 source: NodeMatcher {
1023 node_type: None,
1024 trust_zone: one(TrustZone::Untrusted),
1025 metadata: MetadataMatcher::default(),
1026 not: None,
1027 },
1028 sink: NodeMatcher::default(),
1029 path: PathMatcher::default(),
1030 graph_metadata: MetadataMatcher::default(),
1031 standalone: None,
1032 },
1033 source_file: None,
1034 };
1035
1036 let findings = evaluate_custom_rules(&graph, &paths, &[rule]);
1037 assert!(findings.is_empty());
1038 }
1039
1040 #[test]
1041 fn yaml_round_trip_loads_full_rule() {
1042 let yaml = r#"
1043id: my_secret_to_untrusted
1044name: Secret reaching untrusted step
1045description: "Custom policy: secrets must not reach untrusted steps"
1046severity: critical
1047category: authority_propagation
1048match:
1049 source:
1050 node_type: secret
1051 trust_zone: first_party
1052 sink:
1053 node_type: step
1054 trust_zone: untrusted
1055 path:
1056 crosses_to: [untrusted]
1057"#;
1058 let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml must parse");
1059 assert_eq!(rule.id, "my_secret_to_untrusted");
1060 assert_eq!(rule.severity, Severity::Critical);
1061 assert!(matches!(
1062 rule.match_spec.source.node_type,
1063 Some(OneOrMany::One(NodeKind::Secret))
1064 ));
1065 assert!(matches!(
1066 rule.match_spec.sink.trust_zone,
1067 Some(OneOrMany::One(TrustZone::Untrusted))
1068 ));
1069 assert_eq!(rule.match_spec.path.crosses_to, vec![TrustZone::Untrusted]);
1070 }
1071
1072 #[test]
1073 fn metadata_predicate_must_match_all_keys() {
1074 let mut g = AuthorityGraph::new(source());
1075 let mut meta = HashMap::new();
1076 meta.insert("kind".to_string(), "deploy".to_string());
1077 let secret =
1078 g.add_node_with_metadata(NodeKind::Secret, "TOKEN", TrustZone::FirstParty, meta);
1079 let sink = g.add_node(NodeKind::Step, "remote", TrustZone::Untrusted);
1080 let step = g.add_node(NodeKind::Step, "use", TrustZone::FirstParty);
1081 g.add_edge(step, secret, EdgeKind::HasAccessTo);
1082 g.add_edge(step, sink, EdgeKind::DelegatesTo);
1083
1084 let paths = propagation_analysis(&g, DEFAULT_MAX_HOPS);
1085
1086 let mut want_fields = HashMap::new();
1087 want_fields.insert(
1088 "kind".to_string(),
1089 MetadataPredicate::Equals("deploy".to_string()),
1090 );
1091 let hit = CustomRule {
1092 id: "hit".into(),
1093 name: "n".into(),
1094 description: String::new(),
1095 severity: Severity::High,
1096 category: FindingCategory::AuthorityPropagation,
1097 match_spec: MatchSpec {
1098 source: NodeMatcher {
1099 node_type: one(NodeKind::Secret),
1100 trust_zone: None,
1101 metadata: MetadataMatcher {
1102 fields: want_fields,
1103 not: None,
1104 },
1105 not: None,
1106 },
1107 sink: NodeMatcher::default(),
1108 path: PathMatcher::default(),
1109 graph_metadata: MetadataMatcher::default(),
1110 standalone: None,
1111 },
1112 source_file: None,
1113 };
1114 assert_eq!(evaluate_custom_rules(&g, &paths, &[hit]).len(), 1);
1115
1116 let mut wrong_fields = HashMap::new();
1117 wrong_fields.insert(
1118 "kind".to_string(),
1119 MetadataPredicate::Equals("build".to_string()),
1120 );
1121 let miss = CustomRule {
1122 id: "miss".into(),
1123 name: "n".into(),
1124 description: String::new(),
1125 severity: Severity::High,
1126 category: FindingCategory::AuthorityPropagation,
1127 match_spec: MatchSpec {
1128 source: NodeMatcher {
1129 node_type: one(NodeKind::Secret),
1130 trust_zone: None,
1131 metadata: MetadataMatcher {
1132 fields: wrong_fields,
1133 not: None,
1134 },
1135 not: None,
1136 },
1137 sink: NodeMatcher::default(),
1138 path: PathMatcher::default(),
1139 graph_metadata: MetadataMatcher::default(),
1140 standalone: None,
1141 },
1142 source_file: None,
1143 };
1144 assert!(evaluate_custom_rules(&g, &paths, &[miss]).is_empty());
1145 }
1146
1147 #[test]
1148 fn load_rules_dir_reads_yml_and_yaml() {
1149 let tmp = std::env::temp_dir().join(format!("taudit-custom-rules-{}", std::process::id()));
1150 fs::create_dir_all(&tmp).unwrap();
1151 let yml_path = tmp.join("a.yml");
1152 let yaml_path = tmp.join("b.yaml");
1153 let other_path = tmp.join("c.txt");
1154
1155 fs::write(
1156 &yml_path,
1157 "id: a\nname: a\nseverity: high\ncategory: authority_propagation\n",
1158 )
1159 .unwrap();
1160 fs::write(
1161 &yaml_path,
1162 "id: b\nname: b\nseverity: medium\ncategory: unpinned_action\n",
1163 )
1164 .unwrap();
1165 fs::write(&other_path, "ignored").unwrap();
1166
1167 let rules = load_rules_dir(&tmp).expect("load must succeed");
1168 assert_eq!(rules.len(), 2);
1169 assert_eq!(rules[0].id, "a");
1170 assert_eq!(rules[1].id, "b");
1171
1172 let _ = fs::remove_dir_all(&tmp);
1174 }
1175
1176 #[test]
1177 fn load_rules_dir_reports_yaml_errors_with_path() {
1178 let tmp =
1179 std::env::temp_dir().join(format!("taudit-custom-rules-bad-{}", std::process::id()));
1180 fs::create_dir_all(&tmp).unwrap();
1181 let bad = tmp.join("bad.yml");
1182 fs::write(&bad, "id: x\nseverity: not-a-real-severity\n").unwrap();
1183
1184 let errs = load_rules_dir(&tmp).expect_err("should fail");
1185 assert_eq!(errs.len(), 1);
1186 let msg = errs[0].to_string();
1187 assert!(msg.contains("bad.yml"), "error must mention path: {msg}");
1188
1189 let _ = fs::remove_dir_all(&tmp);
1190 }
1191
1192 #[test]
1200 fn reserved_categories_rejected_by_custom_rule_loader() {
1201 let tmp = std::env::temp_dir().join(format!(
1202 "taudit-custom-rules-reserved-{}",
1203 std::process::id()
1204 ));
1205 fs::create_dir_all(&tmp).unwrap();
1206 let reserved_path = tmp.join("reserved.yml");
1207 fs::write(
1208 &reserved_path,
1209 "id: r\nname: r\nseverity: high\ncategory: egress_blindspot\n",
1210 )
1211 .unwrap();
1212
1213 let errs = load_rules_dir(&tmp).expect_err("reserved category must be rejected");
1214 assert_eq!(errs.len(), 1);
1215 let msg = errs[0].to_string();
1216 assert!(
1217 msg.contains("unknown variant") && msg.contains("egress_blindspot"),
1218 "expected an `unknown variant `egress_blindspot`` error, got: {msg}"
1219 );
1220
1221 let other_path = tmp.join("reserved2.yml");
1223 fs::write(
1224 &other_path,
1225 "id: r2\nname: r2\nseverity: high\ncategory: missing_audit_trail\n",
1226 )
1227 .unwrap();
1228 let errs2 = load_rules_dir(&tmp).expect_err("second reserved category must be rejected");
1229 assert!(errs2.iter().any(|e| {
1231 let m = e.to_string();
1232 m.contains("unknown variant") && m.contains("missing_audit_trail")
1233 }));
1234
1235 let _ = fs::remove_dir_all(&tmp);
1236 }
1237
1238 #[test]
1244 fn reserved_categories_still_serialize_when_constructed_in_rust() {
1245 let f = Finding {
1246 severity: Severity::Medium,
1247 category: FindingCategory::EgressBlindspot,
1248 path: None,
1249 nodes_involved: vec![],
1250 message: "runtime-enriched".into(),
1251 recommendation: Recommendation::Manual {
1252 action: "investigate".into(),
1253 },
1254 source: FindingSource::BuiltIn,
1255 extras: FindingExtras::default(),
1256 };
1257 let json = serde_json::to_value(&f).expect("serialises fine");
1258 assert_eq!(json["category"], "egress_blindspot");
1259
1260 let g = Finding {
1261 category: FindingCategory::MissingAuditTrail,
1262 ..f
1263 };
1264 let json2 = serde_json::to_value(&g).expect("serialises fine");
1265 assert_eq!(json2["category"], "missing_audit_trail");
1266 }
1267
1268 fn simple_first_to_untrusted_graph() -> (AuthorityGraph, Vec<PropagationPath>) {
1273 let mut g = AuthorityGraph::new(source());
1274 let mut meta = HashMap::new();
1275 meta.insert("oidc".to_string(), "true".to_string());
1276 meta.insert("permissions".to_string(), "contents: write".to_string());
1277 meta.insert("role".to_string(), "admin".to_string());
1278 let secret =
1279 g.add_node_with_metadata(NodeKind::Identity, "GH_TOKEN", TrustZone::FirstParty, meta);
1280 let step = g.add_node(NodeKind::Step, "use-it", TrustZone::FirstParty);
1281 let untrusted = g.add_node(NodeKind::Step, "third-party", TrustZone::Untrusted);
1282 g.add_edge(step, secret, EdgeKind::HasAccessTo);
1283 g.add_edge(step, untrusted, EdgeKind::DelegatesTo);
1284 let paths = propagation_analysis(&g, DEFAULT_MAX_HOPS);
1285 (g, paths)
1286 }
1287
1288 #[test]
1289 fn negation_on_trust_zone_inverts_match() {
1290 let (graph, paths) = simple_first_to_untrusted_graph();
1291 let yaml = r#"
1293id: r
1294name: r
1295severity: high
1296category: authority_propagation
1297match:
1298 sink:
1299 not:
1300 trust_zone: untrusted
1301"#;
1302 let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1303 assert!(evaluate_custom_rules(&graph, &paths, &[rule]).is_empty());
1304 }
1305
1306 #[test]
1307 fn negation_on_node_type_list_matches_other_kinds() {
1308 let (graph, paths) = simple_first_to_untrusted_graph();
1309 let yaml = r#"
1312id: r
1313name: r
1314severity: high
1315category: authority_propagation
1316match:
1317 source:
1318 not:
1319 node_type: [secret, identity]
1320"#;
1321 let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1322 assert!(evaluate_custom_rules(&graph, &paths, &[rule]).is_empty());
1323
1324 let yaml2 = r#"
1327id: r2
1328name: r2
1329severity: high
1330category: authority_propagation
1331match:
1332 source:
1333 not:
1334 node_type: [step]
1335"#;
1336 let rule2: CustomRule = serde_yaml::from_str(yaml2).expect("yaml parses");
1337 assert!(!evaluate_custom_rules(&graph, &paths, &[rule2]).is_empty());
1338 }
1339
1340 #[test]
1341 fn metadata_negation_matches_absent_or_other_value() {
1342 let (graph, paths) = simple_first_to_untrusted_graph();
1343 let yaml = r#"
1346id: r
1347name: r
1348severity: high
1349category: authority_propagation
1350match:
1351 source:
1352 metadata:
1353 not:
1354 oidc: "true"
1355"#;
1356 let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1357 assert!(evaluate_custom_rules(&graph, &paths, &[rule]).is_empty());
1358 }
1359
1360 #[test]
1361 fn metadata_contains_does_substring_match() {
1362 let (graph, paths) = simple_first_to_untrusted_graph();
1363 let yaml = r#"
1364id: r
1365name: r
1366severity: high
1367category: authority_propagation
1368match:
1369 source:
1370 metadata:
1371 permissions:
1372 contains: "contents: write"
1373"#;
1374 let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1375 assert_eq!(evaluate_custom_rules(&graph, &paths, &[rule]).len(), 1);
1376
1377 let yaml_miss = r#"
1379id: r
1380name: r
1381severity: high
1382category: authority_propagation
1383match:
1384 source:
1385 metadata:
1386 permissions:
1387 contains: "actions: write"
1388"#;
1389 let rule_miss: CustomRule = serde_yaml::from_str(yaml_miss).expect("yaml parses");
1390 assert!(evaluate_custom_rules(&graph, &paths, &[rule_miss]).is_empty());
1391 }
1392
1393 #[test]
1394 fn metadata_in_matches_any_of_allowed_values() {
1395 let (graph, paths) = simple_first_to_untrusted_graph();
1396 let yaml = r#"
1397id: r
1398name: r
1399severity: high
1400category: authority_propagation
1401match:
1402 source:
1403 metadata:
1404 role:
1405 in: [admin, owner, write]
1406"#;
1407 let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1408 assert_eq!(evaluate_custom_rules(&graph, &paths, &[rule]).len(), 1);
1409
1410 let yaml_miss = r#"
1411id: r
1412name: r
1413severity: high
1414category: authority_propagation
1415match:
1416 source:
1417 metadata:
1418 role:
1419 in: [reader, none]
1420"#;
1421 let rule_miss: CustomRule = serde_yaml::from_str(yaml_miss).expect("yaml parses");
1422 assert!(evaluate_custom_rules(&graph, &paths, &[rule_miss]).is_empty());
1423 }
1424
1425 #[test]
1426 fn metadata_not_equals_excludes_specific_value() {
1427 let (graph, paths) = simple_first_to_untrusted_graph();
1428 let yaml = r#"
1429id: r
1430name: r
1431severity: high
1432category: authority_propagation
1433match:
1434 source:
1435 metadata:
1436 role:
1437 not_equals: admin
1438"#;
1439 let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1440 assert!(evaluate_custom_rules(&graph, &paths, &[rule]).is_empty());
1442
1443 let yaml_hit = r#"
1444id: r
1445name: r
1446severity: high
1447category: authority_propagation
1448match:
1449 source:
1450 metadata:
1451 role:
1452 not_equals: reader
1453"#;
1454 let rule_hit: CustomRule = serde_yaml::from_str(yaml_hit).expect("yaml parses");
1455 assert_eq!(evaluate_custom_rules(&graph, &paths, &[rule_hit]).len(), 1);
1456 }
1457
1458 #[test]
1459 fn nested_not_collapses_to_inner_condition() {
1460 let (graph, paths) = simple_first_to_untrusted_graph();
1461 let yaml = r#"
1464id: r
1465name: r
1466severity: high
1467category: authority_propagation
1468match:
1469 source:
1470 not:
1471 not:
1472 trust_zone: first_party
1473"#;
1474 let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1475 assert!(!evaluate_custom_rules(&graph, &paths, &[rule]).is_empty());
1476 }
1477
1478 #[test]
1479 fn node_type_accepts_single_value_back_compat() {
1480 let yaml = r#"
1482id: r
1483name: r
1484severity: high
1485category: authority_propagation
1486match:
1487 source:
1488 node_type: identity
1489 trust_zone: first_party
1490 metadata:
1491 oidc: "true"
1492"#;
1493 let rule: CustomRule = serde_yaml::from_str(yaml).expect("v0.4 form must still parse");
1494 assert!(matches!(
1495 rule.match_spec.source.node_type,
1496 Some(OneOrMany::One(NodeKind::Identity))
1497 ));
1498 assert!(matches!(
1499 rule.match_spec.source.trust_zone,
1500 Some(OneOrMany::One(TrustZone::FirstParty))
1501 ));
1502 let pred = rule
1503 .match_spec
1504 .source
1505 .metadata
1506 .fields
1507 .get("oidc")
1508 .expect("oidc predicate");
1509 assert!(matches!(pred, MetadataPredicate::Equals(v) if v == "true"));
1510
1511 let (graph, paths) = simple_first_to_untrusted_graph();
1512 assert_eq!(evaluate_custom_rules(&graph, &paths, &[rule]).len(), 1);
1513 }
1514
1515 #[test]
1516 fn node_type_accepts_list_form() {
1517 let yaml = r#"
1518id: r
1519name: r
1520severity: high
1521category: authority_propagation
1522match:
1523 source:
1524 node_type: [secret, identity]
1525 trust_zone: [first_party, third_party]
1526"#;
1527 let rule: CustomRule = serde_yaml::from_str(yaml).expect("list form must parse");
1528 match &rule.match_spec.source.node_type {
1529 Some(OneOrMany::Many(v)) => {
1530 assert_eq!(v, &vec![NodeKind::Secret, NodeKind::Identity]);
1531 }
1532 other => panic!("expected list form, got {other:?}"),
1533 }
1534 let (graph, paths) = simple_first_to_untrusted_graph();
1535 assert_eq!(evaluate_custom_rules(&graph, &paths, &[rule]).len(), 1);
1536 }
1537
1538 fn pr_context_graph_with_meta(meta: &[(&str, &str)]) -> (AuthorityGraph, Vec<PropagationPath>) {
1543 let mut g = AuthorityGraph::new(source());
1544 let mut secret_meta = HashMap::new();
1545 secret_meta.insert("variable_group".to_string(), "true".to_string());
1546 let secret = g.add_node_with_metadata(
1547 NodeKind::Secret,
1548 "VG_SECRET",
1549 TrustZone::FirstParty,
1550 secret_meta,
1551 );
1552 let step = g.add_node(NodeKind::Step, "use", TrustZone::FirstParty);
1553 let untrusted = g.add_node(NodeKind::Step, "third-party", TrustZone::Untrusted);
1554 g.add_edge(step, secret, crate::graph::EdgeKind::HasAccessTo);
1555 g.add_edge(step, untrusted, crate::graph::EdgeKind::DelegatesTo);
1556 for (k, v) in meta {
1557 g.metadata.insert((*k).to_string(), (*v).to_string());
1558 }
1559 let paths = propagation_analysis(&g, DEFAULT_MAX_HOPS);
1560 (g, paths)
1561 }
1562
1563 #[test]
1564 fn graph_metadata_equals_matches_when_value_present() {
1565 let (graph, paths) = pr_context_graph_with_meta(&[("trigger", "pr")]);
1566 let yaml = r#"
1567id: r
1568name: r
1569severity: high
1570category: authority_propagation
1571match:
1572 graph_metadata:
1573 trigger:
1574 equals: pr
1575 source:
1576 metadata:
1577 variable_group: "true"
1578"#;
1579 let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1580 assert_eq!(evaluate_custom_rules(&graph, &paths, &[rule]).len(), 1);
1581 }
1582
1583 #[test]
1584 fn graph_metadata_in_matches_any_of_listed_values() {
1585 let (graph, paths) = pr_context_graph_with_meta(&[("trigger", "merge_request_event")]);
1586 let yaml = r#"
1587id: r
1588name: r
1589severity: high
1590category: authority_propagation
1591match:
1592 graph_metadata:
1593 trigger:
1594 in: [pull_request_target, pr, merge_request_event]
1595"#;
1596 let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1597 assert!(!evaluate_custom_rules(&graph, &paths, &[rule]).is_empty());
1598 }
1599
1600 #[test]
1601 fn graph_metadata_negation_excludes_unwanted_trigger() {
1602 let (graph, paths) = pr_context_graph_with_meta(&[("trigger", "push")]);
1604 let yaml = r#"
1605id: r
1606name: r
1607severity: high
1608category: authority_propagation
1609match:
1610 graph_metadata:
1611 not:
1612 trigger:
1613 equals: push
1614"#;
1615 let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1616 assert!(evaluate_custom_rules(&graph, &paths, &[rule]).is_empty());
1617
1618 let (graph2, paths2) = pr_context_graph_with_meta(&[("trigger", "pr")]);
1620 let rule2: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1621 assert!(!evaluate_custom_rules(&graph2, &paths2, &[rule2]).is_empty());
1622 }
1623
1624 #[test]
1625 fn graph_metadata_missing_key_does_not_match_no_crash() {
1626 let (graph, paths) = pr_context_graph_with_meta(&[]);
1629 assert!(!graph.metadata.contains_key("trigger"));
1630 let yaml = r#"
1631id: r
1632name: r
1633severity: high
1634category: authority_propagation
1635match:
1636 graph_metadata:
1637 trigger:
1638 equals: pr
1639"#;
1640 let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1641 let findings = evaluate_custom_rules(&graph, &paths, &[rule]);
1642 assert!(findings.is_empty(), "missing key must yield no findings");
1643 }
1644
1645 #[test]
1646 fn rules_without_graph_metadata_remain_backward_compatible() {
1647 let (graph, paths) = pr_context_graph_with_meta(&[("trigger", "anything")]);
1650 let yaml = r#"
1651id: r
1652name: r
1653severity: high
1654category: authority_propagation
1655match:
1656 source:
1657 metadata:
1658 variable_group: "true"
1659"#;
1660 let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1661 assert_eq!(evaluate_custom_rules(&graph, &paths, &[rule]).len(), 1);
1662 }
1663
1664 fn graph_with_image_sink() -> (AuthorityGraph, Vec<PropagationPath>) {
1671 let mut g = AuthorityGraph::new(source());
1672 let identity = g.add_node(NodeKind::Identity, "GH_TOKEN", TrustZone::FirstParty);
1673 let step = g.add_node(NodeKind::Step, "publish", TrustZone::FirstParty);
1674 let image = g.add_node(
1675 NodeKind::Image,
1676 "third-party/deploy@v1",
1677 TrustZone::Untrusted,
1678 );
1679 g.add_edge(step, identity, crate::graph::EdgeKind::HasAccessTo);
1680 g.add_edge(step, image, crate::graph::EdgeKind::UsesImage);
1681 let paths = propagation_analysis(&g, DEFAULT_MAX_HOPS);
1682 (g, paths)
1683 }
1684
1685 #[test]
1686 fn sink_node_type_image_matches_image_path_endpoint() {
1687 let (graph, paths) = graph_with_image_sink();
1688 let yaml = r#"
1689id: r
1690name: r
1691severity: high
1692category: untrusted_with_authority
1693match:
1694 sink:
1695 node_type: image
1696 trust_zone: untrusted
1697"#;
1698 let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1699 let findings = evaluate_custom_rules(&graph, &paths, &[rule]);
1700 assert!(
1701 !findings.is_empty(),
1702 "Image-as-sink must produce at least one finding"
1703 );
1704 }
1705
1706 #[test]
1707 fn standalone_matches_every_floating_image_in_graph() {
1708 let mut g = AuthorityGraph::new(source());
1710 let _step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
1711 let _floating1 = g.add_node(NodeKind::Image, "alpine:latest", TrustZone::ThirdParty);
1712 let _floating2 = g.add_node(NodeKind::Image, "ubuntu:22.04", TrustZone::ThirdParty);
1713 let mut pinned_meta = HashMap::new();
1714 pinned_meta.insert("digest".to_string(), "sha256:abc".to_string());
1715 let _pinned = g.add_node_with_metadata(
1716 NodeKind::Image,
1717 "alpine@sha256:abc",
1718 TrustZone::ThirdParty,
1719 pinned_meta,
1720 );
1721 let paths: Vec<PropagationPath> = Vec::new();
1723
1724 let yaml = r#"
1725id: floating_image_standalone
1726name: Floating image
1727severity: medium
1728category: unpinned_action
1729match:
1730 standalone:
1731 node_type: image
1732 not:
1733 metadata:
1734 digest:
1735 contains: "sha256:"
1736"#;
1737 let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1738 let findings = evaluate_custom_rules(&g, &paths, &[rule]);
1739 assert_eq!(
1740 findings.len(),
1741 2,
1742 "standalone must fire once per floating Image node"
1743 );
1744 }
1745
1746 #[test]
1747 fn standalone_supports_in_operator() {
1748 let mut g = AuthorityGraph::new(source());
1749 let mut self_hosted_meta = HashMap::new();
1750 self_hosted_meta.insert("self_hosted".to_string(), "true".to_string());
1751 let _pool = g.add_node_with_metadata(
1752 NodeKind::Image,
1753 "self-pool",
1754 TrustZone::FirstParty,
1755 self_hosted_meta,
1756 );
1757 let _hosted = g.add_node(NodeKind::Image, "ubuntu-latest", TrustZone::ThirdParty);
1758 let paths: Vec<PropagationPath> = Vec::new();
1759
1760 let yaml = r#"
1761id: r
1762name: r
1763severity: high
1764category: authority_propagation
1765match:
1766 standalone:
1767 node_type: image
1768 metadata:
1769 self_hosted:
1770 in: ["true", "yes"]
1771"#;
1772 let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1773 let findings = evaluate_custom_rules(&g, &paths, &[rule]);
1774 assert_eq!(findings.len(), 1, "in:[\"true\",\"yes\"] matches one node");
1775 }
1776
1777 #[test]
1778 fn standalone_still_honors_graph_metadata_gate() {
1779 let mut g_pr = AuthorityGraph::new(source());
1782 g_pr.metadata.insert("trigger".into(), "pr".into());
1783 g_pr.add_node(NodeKind::Image, "alpine:latest", TrustZone::ThirdParty);
1784
1785 let mut g_push = AuthorityGraph::new(source());
1786 g_push.metadata.insert("trigger".into(), "push".into());
1787 g_push.add_node(NodeKind::Image, "alpine:latest", TrustZone::ThirdParty);
1788
1789 let yaml = r#"
1790id: r
1791name: r
1792severity: low
1793category: unpinned_action
1794match:
1795 graph_metadata:
1796 trigger:
1797 equals: pr
1798 standalone:
1799 node_type: image
1800"#;
1801 let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1802 assert_eq!(
1803 evaluate_custom_rules(&g_pr, &[], std::slice::from_ref(&rule)).len(),
1804 1,
1805 "fires on PR graph"
1806 );
1807 assert!(
1808 evaluate_custom_rules(&g_push, &[], std::slice::from_ref(&rule)).is_empty(),
1809 "graph_metadata gate must suppress on push graph"
1810 );
1811 }
1812
1813 #[test]
1814 fn standalone_ignores_source_sink_path_fields() {
1815 let mut g = AuthorityGraph::new(source());
1818 let _img = g.add_node(NodeKind::Image, "alpine:latest", TrustZone::ThirdParty);
1819 let paths: Vec<PropagationPath> = Vec::new();
1820
1821 let yaml = r#"
1822id: r
1823name: r
1824severity: low
1825category: unpinned_action
1826match:
1827 source:
1828 node_type: secret # would never match anything in this graph
1829 standalone:
1830 node_type: image
1831"#;
1832 let rule: CustomRule = serde_yaml::from_str(yaml).expect("yaml parses");
1833 let findings = evaluate_custom_rules(&g, &paths, &[rule]);
1834 assert_eq!(findings.len(), 1);
1835 }
1836
1837 #[test]
1840 fn multi_doc_yaml_loads_each_document_as_separate_rule() {
1841 let yaml = r#"
1842id: rule_a
1843name: First rule
1844severity: high
1845category: authority_propagation
1846match:
1847 source:
1848 node_type: secret
1849---
1850id: rule_b
1851name: Second rule
1852severity: critical
1853category: untrusted_with_authority
1854match:
1855 sink:
1856 trust_zone: untrusted
1857---
1858id: rule_c
1859name: Third rule
1860severity: medium
1861category: unpinned_action
1862"#;
1863 let rules = parse_rules_multi_doc(yaml).expect("multi-doc must parse");
1864 assert_eq!(rules.len(), 3, "expected 3 rules from 3-doc YAML");
1865 assert_eq!(rules[0].id, "rule_a");
1866 assert_eq!(rules[1].id, "rule_b");
1867 assert_eq!(rules[2].id, "rule_c");
1868 assert_eq!(rules[1].severity, Severity::Critical);
1869 }
1870
1871 #[test]
1872 fn single_doc_yaml_still_loads_identically() {
1873 let yaml = r#"
1874id: solo
1875name: Solo rule
1876severity: high
1877category: authority_propagation
1878"#;
1879 let rules = parse_rules_multi_doc(yaml).expect("single-doc must parse");
1880 assert_eq!(rules.len(), 1);
1881 assert_eq!(rules[0].id, "solo");
1882 }
1883
1884 #[test]
1885 fn multi_doc_with_empty_leading_document_is_skipped() {
1886 let yaml = r#"---
1887---
1888id: only
1889name: only
1890severity: low
1891category: authority_propagation
1892"#;
1893 let rules = parse_rules_multi_doc(yaml).expect("must parse");
1894 assert_eq!(rules.len(), 1);
1895 assert_eq!(rules[0].id, "only");
1896 }
1897
1898 #[test]
1899 fn load_rules_dir_loads_multi_doc_files() {
1900 let tmp =
1901 std::env::temp_dir().join(format!("taudit-custom-rules-multi-{}", std::process::id()));
1902 fs::create_dir_all(&tmp).unwrap();
1903 let path = tmp.join("bundle.yml");
1904 fs::write(
1905 &path,
1906 r#"
1907id: a
1908name: a
1909severity: high
1910category: authority_propagation
1911---
1912id: b
1913name: b
1914severity: medium
1915category: unpinned_action
1916---
1917id: c
1918name: c
1919severity: low
1920category: authority_propagation
1921"#,
1922 )
1923 .unwrap();
1924
1925 let rules = load_rules_dir(&tmp).expect("multi-doc file must load");
1926 assert_eq!(rules.len(), 3, "expected 3 rules from one bundled file");
1927
1928 let _ = fs::remove_dir_all(&tmp);
1929 }
1930
1931 #[test]
1934 fn loaded_rule_threads_source_file_into_findings() {
1935 let tmp = std::env::temp_dir().join(format!("taudit-custom-prov-{}", std::process::id()));
1936 fs::create_dir_all(&tmp).unwrap();
1937 let path = tmp.join("provenance.yml");
1938 fs::write(
1939 &path,
1940 r#"
1941id: from_disk
1942name: From disk
1943description: planted invariant
1944severity: critical
1945category: authority_propagation
1946match:
1947 source:
1948 trust_zone: first_party
1949 sink:
1950 trust_zone: untrusted
1951"#,
1952 )
1953 .unwrap();
1954
1955 let rules = load_rules_dir(&tmp).expect("rules load");
1956 assert_eq!(rules.len(), 1);
1957 assert_eq!(rules[0].source_file.as_deref(), Some(path.as_path()));
1959
1960 let (graph, paths) = build_graph_with_paths();
1961 let findings = evaluate_custom_rules(&graph, &paths, &rules);
1962 assert_eq!(findings.len(), 1);
1963 match &findings[0].source {
1964 FindingSource::Custom { source_file } => {
1965 assert_eq!(
1966 source_file, &path,
1967 "custom finding must carry the YAML path it was loaded from"
1968 );
1969 }
1970 other => panic!("expected FindingSource::Custom, got {other:?}"),
1971 }
1972
1973 let _ = fs::remove_dir_all(&tmp);
1974 }
1975
1976 #[test]
1977 fn in_memory_custom_rule_emits_custom_source_with_empty_path() {
1978 let (graph, paths) = build_graph_with_paths();
1983 let rule = CustomRule {
1984 id: "in_mem".into(),
1985 name: "in-memory".into(),
1986 description: String::new(),
1987 severity: Severity::High,
1988 category: FindingCategory::AuthorityPropagation,
1989 match_spec: MatchSpec::default(),
1990 source_file: None,
1991 };
1992 let findings = evaluate_custom_rules(&graph, &paths, &[rule]);
1993 assert!(!findings.is_empty(), "in-mem rule must still match");
1994 for f in &findings {
1995 match &f.source {
1996 FindingSource::Custom { source_file } => {
1997 assert!(
1998 source_file.as_os_str().is_empty(),
1999 "in-mem custom rule emits Custom with empty path, not BuiltIn"
2000 );
2001 }
2002 other => {
2003 panic!("in-memory custom rule must still produce Custom source, got {other:?}")
2004 }
2005 }
2006 }
2007 }
2008
2009 #[test]
2010 fn unknown_metadata_operator_is_rejected() {
2011 let yaml = r#"
2012id: r
2013name: r
2014severity: high
2015category: authority_propagation
2016match:
2017 source:
2018 metadata:
2019 role:
2020 starts_with: adm
2021"#;
2022 let err = serde_yaml::from_str::<CustomRule>(yaml)
2023 .expect_err("unknown operator must be rejected");
2024 let msg = err.to_string();
2025 assert!(
2029 msg.contains("metadata") || msg.contains("variant"),
2030 "parse should fail with a meaningful location: {msg}"
2031 );
2032 }
2033
2034 #[cfg(unix)]
2040 fn unique_tmp(prefix: &str) -> PathBuf {
2041 use std::sync::atomic::{AtomicU64, Ordering};
2042 static COUNTER: AtomicU64 = AtomicU64::new(0);
2043 let n = COUNTER.fetch_add(1, Ordering::SeqCst);
2044 std::env::temp_dir().join(format!(
2045 "taudit-symlink-{prefix}-{}-{n}",
2046 std::process::id()
2047 ))
2048 }
2049
2050 #[cfg(unix)]
2051 fn write_minimal_rule(path: &Path, id: &str) {
2052 fs::write(
2053 path,
2054 format!("id: {id}\nname: {id}\nseverity: high\ncategory: authority_propagation\n"),
2055 )
2056 .unwrap();
2057 }
2058
2059 #[test]
2060 #[cfg(unix)]
2061 fn load_rules_dir_follows_in_tree_symlink_with_warning() {
2062 use std::os::unix::fs::symlink;
2063
2064 let tmp = unique_tmp("intree");
2065 fs::create_dir_all(&tmp).unwrap();
2066
2067 let real = tmp.join("real.yml");
2068 write_minimal_rule(&real, "in_tree");
2069 let link = tmp.join("alias.yml");
2070 symlink(&real, &link).unwrap();
2071
2072 let rules = load_rules_dir(&tmp).expect("in-tree symlink must be loaded");
2079 assert_eq!(
2080 rules.len(),
2081 1,
2082 "expected 1 rule (alias deduped against real target), got {rules:?}"
2083 );
2084 assert_eq!(rules[0].id, "in_tree");
2085
2086 let _ = fs::remove_dir_all(&tmp);
2087 }
2088
2089 #[test]
2090 #[cfg(unix)]
2091 fn load_rules_dir_refuses_out_of_tree_symlink_by_default() {
2092 use std::os::unix::fs::symlink;
2093
2094 let tmp = unique_tmp("outoftree-refuse");
2095 fs::create_dir_all(&tmp).unwrap();
2096
2097 let outside_dir = unique_tmp("outoftree-target");
2098 fs::create_dir_all(&outside_dir).unwrap();
2099 let outside_file = outside_dir.join("evil.yml");
2100 write_minimal_rule(&outside_file, "evil");
2101
2102 let link = tmp.join("legit.yml");
2103 symlink(&outside_file, &link).unwrap();
2104
2105 let errs = load_rules_dir(&tmp).expect_err("out-of-tree symlink must be refused");
2106 assert_eq!(errs.len(), 1);
2107 assert!(
2108 matches!(errs[0], CustomRuleError::SymlinkOutsideDir { .. }),
2109 "expected SymlinkOutsideDir, got {:?}",
2110 errs[0]
2111 );
2112 let msg = errs[0].to_string();
2113 assert!(
2114 msg.contains("legit.yml") && msg.contains("evil.yml"),
2115 "error should name both link and target: {msg}"
2116 );
2117
2118 let _ = fs::remove_dir_all(&tmp);
2119 let _ = fs::remove_dir_all(&outside_dir);
2120 }
2121
2122 #[test]
2123 #[cfg(unix)]
2124 fn load_rules_dir_follows_out_of_tree_symlink_with_override() {
2125 use std::os::unix::fs::symlink;
2126
2127 let tmp = unique_tmp("outoftree-override");
2128 fs::create_dir_all(&tmp).unwrap();
2129
2130 let outside_dir = unique_tmp("outoftree-target-override");
2131 fs::create_dir_all(&outside_dir).unwrap();
2132 let outside_file = outside_dir.join("external.yml");
2133 write_minimal_rule(&outside_file, "external");
2134
2135 let link = tmp.join("aliased.yml");
2136 symlink(&outside_file, &link).unwrap();
2137
2138 let rules = load_rules_dir_with_opts(&tmp, true)
2139 .expect("override flag must allow external symlinks");
2140 assert_eq!(rules.len(), 1);
2141 assert_eq!(rules[0].id, "external");
2142
2143 let _ = fs::remove_dir_all(&tmp);
2144 let _ = fs::remove_dir_all(&outside_dir);
2145 }
2146
2147 #[test]
2150 fn load_rules_dir_walks_subdirectories() {
2151 let tmp = std::env::temp_dir().join(format!(
2156 "taudit-custom-rules-recursive-{}",
2157 std::process::id()
2158 ));
2159 let sub = tmp.join("sub");
2160 fs::create_dir_all(&sub).unwrap();
2161
2162 let nested = sub.join("rule.yml");
2163 fs::write(
2164 &nested,
2165 "id: nested\nname: nested\nseverity: high\ncategory: authority_propagation\n",
2166 )
2167 .unwrap();
2168
2169 let rules = load_rules_dir(&tmp).expect("recursive walk must load nested rule");
2170 assert_eq!(
2171 rules.len(),
2172 1,
2173 "expected 1 rule from nested dir, got {rules:?}"
2174 );
2175 assert_eq!(rules[0].id, "nested");
2176
2177 let _ = fs::remove_dir_all(&tmp);
2178 }
2179
2180 #[test]
2183 #[cfg(unix)]
2184 fn load_rules_dir_dedupes_in_tree_symlink() {
2185 use std::os::unix::fs::symlink;
2186
2187 let tmp = unique_tmp("dedup");
2188 fs::create_dir_all(&tmp).unwrap();
2189
2190 let real = tmp.join("real.yml");
2191 write_minimal_rule(&real, "dedup_target");
2192 let alias = tmp.join("alias.yml");
2193 symlink(&real, &alias).unwrap();
2194
2195 let rules = load_rules_dir(&tmp).expect("alias dedup must succeed");
2200 assert_eq!(rules.len(), 1, "expected 1 rule after dedup, got {rules:?}");
2201 assert_eq!(rules[0].id, "dedup_target");
2202
2203 let _ = fs::remove_dir_all(&tmp);
2204 }
2205
2206 #[test]
2209 fn custom_rule_id_validation_rejects_empty() {
2210 let yaml = r#"
2211id: ""
2212name: x
2213severity: high
2214category: authority_propagation
2215"#;
2216 let err = serde_yaml::from_str::<CustomRule>(yaml).expect_err("empty id must be rejected");
2217 let msg = err.to_string();
2218 assert!(
2219 msg.contains("non-empty"),
2220 "error must explain why empty fails: {msg}"
2221 );
2222 }
2223
2224 #[test]
2225 fn custom_rule_id_validation_rejects_brackets() {
2226 let yaml = r#"
2227id: "foo] [bar"
2228name: x
2229severity: high
2230category: authority_propagation
2231"#;
2232 let err =
2233 serde_yaml::from_str::<CustomRule>(yaml).expect_err("bracket in id must be rejected");
2234 let msg = err.to_string();
2235 assert!(
2236 msg.contains("foo] [bar") && msg.contains("invalid character"),
2237 "error must name the offending id and the invalid character: {msg}"
2238 );
2239 assert!(
2242 msg.contains("']'") || msg.contains("'['") || msg.contains("' '"),
2243 "error should quote the first offending character: {msg}"
2244 );
2245 }
2246
2247 #[test]
2248 fn custom_rule_id_validation_rejects_whitespace() {
2249 let yaml = r#"
2250id: "foo bar"
2251name: x
2252severity: high
2253category: authority_propagation
2254"#;
2255 let err = serde_yaml::from_str::<CustomRule>(yaml)
2256 .expect_err("whitespace in id must be rejected");
2257 let msg = err.to_string();
2258 assert!(
2259 msg.contains("foo bar") && msg.contains("invalid character"),
2260 "error must name the offending id and explain why: {msg}"
2261 );
2262 }
2263
2264 #[test]
2265 fn custom_rule_id_validation_accepts_snake_case() {
2266 let yaml = r#"
2267id: my_rule
2268name: snake-case rule
2269severity: high
2270category: authority_propagation
2271"#;
2272 let rule: CustomRule = serde_yaml::from_str(yaml).expect("snake_case id must be accepted");
2273 assert_eq!(rule.id, "my_rule");
2274 }
2275
2276 #[test]
2277 fn custom_rule_id_validation_accepts_kebab_case() {
2278 let yaml = r#"
2279id: my-rule
2280name: kebab-case rule
2281severity: high
2282category: authority_propagation
2283"#;
2284 let rule: CustomRule = serde_yaml::from_str(yaml).expect("kebab-case id must be accepted");
2285 assert_eq!(rule.id, "my-rule");
2286 }
2287
2288 #[test]
2289 fn custom_rule_id_validation_rejects_64_chars_plus_one() {
2290 let id = "a".repeat(65);
2291 let yaml = format!("id: {id}\nname: x\nseverity: high\ncategory: authority_propagation\n");
2292 let err =
2293 serde_yaml::from_str::<CustomRule>(&yaml).expect_err("65-char id must be rejected");
2294 let msg = err.to_string();
2295 assert!(
2296 msg.contains("64 characters"),
2297 "error must cite the 64-char cap: {msg}"
2298 );
2299
2300 let id_ok = "a".repeat(64);
2302 let yaml_ok =
2303 format!("id: {id_ok}\nname: x\nseverity: high\ncategory: authority_propagation\n");
2304 let rule: CustomRule =
2305 serde_yaml::from_str(&yaml_ok).expect("64-char id must be accepted (boundary case)");
2306 assert_eq!(rule.id.len(), 64);
2307 }
2308
2309 #[test]
2310 fn custom_rule_id_validation_rejects_leading_digit() {
2311 let yaml = r#"
2314id: 1bad
2315name: x
2316severity: high
2317category: authority_propagation
2318"#;
2319 let err = serde_yaml::from_str::<CustomRule>(yaml)
2320 .expect_err("digit-leading id must be rejected");
2321 let msg = err.to_string();
2322 assert!(
2323 msg.contains("must start"),
2324 "error must explain the first-char rule: {msg}"
2325 );
2326 }
2327}