1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
use super::summarization::{sink_summary, source_summary};
use super::utils::{
artifact_kind_for_node, artifact_paths, build_sibling_clusters, node_has_sink, node_has_source,
};
use super::ArtifactTaintRuleGroup;
use crate::artifact_graph::ArtifactGraph;
use crate::findings::{EvidenceKind, Finding, MatchTarget};
use std::collections::BTreeSet;
pub(super) fn derive_per_node_taint_findings(
graph: &ArtifactGraph,
groups: &[ArtifactTaintRuleGroup],
) -> Vec<Finding> {
let mut findings = Vec::new();
for node_path in &artifact_paths(graph) {
for group in groups {
if !node_has_source(graph, node_path, group.source)
|| !node_has_sink(graph, node_path, group.sink)
{
continue;
}
let src = source_summary(graph, node_path, group.source);
let snk = sink_summary(graph, node_path, group.sink);
let kind = artifact_kind_for_node(graph, node_path);
for rule in &group.rules {
findings.push(
Finding::builder(rule.id.clone(), rule.category)
.severity(rule.severity)
.confidence(rule.confidence)
.action(rule.action)
.evidence_kind(EvidenceKind::Behavior)
.artifact(kind, Some(node_path.clone()))
.matched_on(MatchTarget::ReferencedFile {
path: node_path.clone(),
})
.match_value(format!(
"family={} source={} sink={}",
rule.family, src, snk
))
.reason(rule.reason.clone())
.build(),
);
}
}
}
findings
}
pub(super) fn derive_cross_node_taint_findings(
graph: &ArtifactGraph,
groups: &[ArtifactTaintRuleGroup],
) -> Vec<Finding> {
// Cap per-cluster findings to avoid quadratic explosion when a parent
// references many children that each expose sources and sinks.
const MAX_CROSS_NODE_FINDINGS_PER_CLUSTER: usize = 50;
// Global cap across all clusters: without this, `per_group_budget * N
// sibling_clusters` can far exceed the per-cluster constant. Monorepo-style
// packages with many parent-child relationships are the typical trigger.
const MAX_CROSS_NODE_FINDINGS_TOTAL: usize = 100;
let sibling_clusters = build_sibling_clusters(graph);
// Divide budget across groups so every source-sink family gets representation,
// even when a high-volume group would otherwise exhaust the entire budget.
debug_assert!(
groups.len() <= MAX_CROSS_NODE_FINDINGS_PER_CLUSTER,
"Number of taint rule groups ({}) exceeds per-cluster budget ({}); each group will be capped to 1 finding",
groups.len(),
MAX_CROSS_NODE_FINDINGS_PER_CLUSTER
);
let per_group_budget = if groups.is_empty() {
0
} else {
(MAX_CROSS_NODE_FINDINGS_PER_CLUSTER / groups.len()).max(1)
};
let mut findings = Vec::new();
for cluster in &sibling_clusters {
if cluster.len() < 2 {
continue;
}
if findings.len() >= MAX_CROSS_NODE_FINDINGS_TOTAL {
break;
}
for group in groups {
let source_nodes: Vec<&String> = cluster
.iter()
.filter(|path| node_has_source(graph, path, group.source))
.collect();
let sink_nodes: Vec<&String> = cluster
.iter()
.filter(|path| node_has_sink(graph, path, group.sink))
.collect();
let mut group_finding_count = 0_usize;
'group: for source_node in &source_nodes {
for sink_node in &sink_nodes {
if source_node == sink_node {
continue; // already covered by per-node pass
}
let src = source_summary(graph, source_node, group.source);
let snk = sink_summary(graph, sink_node, group.sink);
let kind = artifact_kind_for_node(graph, source_node);
for rule in &group.rules {
// Check budgets *before* pushing each finding.
// Per-group budget prevents a single group from
// monopolising the cluster budget. Global total
// cap prevents `per_group_budget * N clusters`
// from exceeding the intended ceiling.
if findings.len() >= MAX_CROSS_NODE_FINDINGS_TOTAL {
break 'group;
}
if group_finding_count >= per_group_budget {
break 'group;
}
// `artifact_path` and `matched_on` BOTH point at the
// source node. Pre-fix the artifact was attributed to
// the source while `matched_on` pointed at the sink,
// so a single finding referenced two distinct files —
// confusing for auditors and breaking suppression
// path-matching (which keys on `artifact_path`). The
// source/sink relationship is preserved verbatim in
// `match_value` (`source={src} sink={snk}`).
findings.push(
Finding::builder(rule.id.clone(), rule.category)
.severity(rule.severity)
.confidence(rule.confidence * 0.9)
.action(rule.action)
.evidence_kind(EvidenceKind::Behavior)
.artifact(kind, Some((*source_node).clone()))
.matched_on(MatchTarget::ReferencedFile {
path: (*source_node).clone(),
})
.match_value(format!(
"family={} source={} sink={}",
rule.family, src, snk
))
.reason(rule.reason.clone())
.build(),
);
group_finding_count += 1;
}
}
}
}
}
findings
}
// Suppress the unused import warning — BTreeSet is used by build_sibling_clusters
// which returns Vec<BTreeSet<String>> but the type is inferred.
const _: () = {
let _ = std::mem::size_of::<BTreeSet<String>>();
};