use super::patterns::{
looks_like_external_sink, looks_like_identity_target, looks_like_secret_target,
};
use super::trusted_hosts::{
host_matches_secret_owner, is_documentation_or_reserved_host, is_trusted_api_host,
};
use super::{TaintSinkKind, TaintSourceKind};
use crate::artifact_graph::{ArtifactCapability, ArtifactGraph, ArtifactRelation};
use crate::findings::ArtifactKind;
use std::collections::{BTreeMap, BTreeSet};
pub(super) fn artifact_paths(graph: &ArtifactGraph) -> Vec<String> {
let mut paths = BTreeSet::new();
for node in &graph.nodes {
paths.insert(node.path.clone());
}
paths.into_iter().collect()
}
pub(super) fn artifact_kind_for_node(graph: &ArtifactGraph, path: &str) -> ArtifactKind {
graph
.nodes
.iter()
.find(|node| node.path == path)
.map(|node| node.kind)
.unwrap_or(ArtifactKind::GenericArtifact)
}
pub(super) fn node_has_capability(
graph: &ArtifactGraph,
node_path: &str,
capability: ArtifactCapability,
) -> bool {
graph.nodes.iter().any(|node| {
node.path == node_path
&& node
.capabilities
.iter()
.any(|fact| fact.capability == capability)
})
}
pub(super) fn node_has_source(
graph: &ArtifactGraph,
node_path: &str,
source: TaintSourceKind,
) -> bool {
match source {
TaintSourceKind::SecretAccess => {
node_has_capability(graph, node_path, ArtifactCapability::SecretAccess)
|| graph.edges.iter().any(|edge| {
edge.from == node_path
&& matches!(edge.relation, ArtifactRelation::AccessesSecrets)
})
|| graph.edges.iter().any(|edge| {
edge.from == node_path
&& matches!(edge.relation, ArtifactRelation::Reads)
&& looks_like_secret_target(&edge.to)
})
}
TaintSourceKind::RemoteDownload => graph.edges.iter().any(|edge| {
edge.from == node_path && super::summarization::is_external_download_edge(edge)
}),
TaintSourceKind::FilesystemWrite => {
node_has_capability(graph, node_path, ArtifactCapability::FilesystemWrite)
|| graph.edges.iter().any(|edge| {
edge.from == node_path && matches!(edge.relation, ArtifactRelation::Writes)
})
}
TaintSourceKind::IdentityAccess => {
node_has_capability(graph, node_path, ArtifactCapability::IdentityAccess)
|| graph.edges.iter().any(|edge| {
edge.from == node_path
&& matches!(edge.relation, ArtifactRelation::Reads)
&& looks_like_identity_target(&edge.to)
})
}
}
}
fn node_secret_source_names(graph: &ArtifactGraph, node_path: &str) -> BTreeSet<String> {
graph
.edges
.iter()
.filter(|edge| edge.from == node_path)
.filter(|edge| {
matches!(edge.relation, ArtifactRelation::AccessesSecrets)
|| (matches!(edge.relation, ArtifactRelation::Reads)
&& (looks_like_secret_target(&edge.to) || looks_like_identity_target(&edge.to)))
})
.map(|edge| edge.to.clone())
.collect()
}
pub(super) fn all_external_sinks_first_party_or_trusted(
graph: &ArtifactGraph,
node_path: &str,
) -> bool {
let secret_names = node_secret_source_names(graph, node_path);
let mut saw_real_external = false;
for edge in &graph.edges {
if edge.from != node_path {
continue;
}
if !matches!(edge.relation, ArtifactRelation::ConnectsTo) {
continue;
}
if !looks_like_external_sink(edge) {
continue;
}
if is_documentation_or_reserved_host(&edge.to) {
continue;
}
saw_real_external = true;
if !is_trusted_api_host(&edge.to) && !host_matches_secret_owner(&edge.to, &secret_names) {
return false;
}
}
saw_real_external
}
pub(super) fn node_has_sink(graph: &ArtifactGraph, node_path: &str, sink: TaintSinkKind) -> bool {
match sink {
TaintSinkKind::ExternalNetwork => graph.edges.iter().any(|edge| {
edge.from == node_path
&& matches!(edge.relation, ArtifactRelation::ConnectsTo)
&& looks_like_external_sink(edge)
}),
TaintSinkKind::Execution => {
node_has_capability(graph, node_path, ArtifactCapability::ProcessExecution)
|| node_has_capability(graph, node_path, ArtifactCapability::InstallExecution)
|| graph.edges.iter().any(|edge| {
edge.from == node_path && matches!(edge.relation, ArtifactRelation::Executes)
})
}
TaintSinkKind::Persistence => {
node_has_capability(graph, node_path, ArtifactCapability::PersistenceSurface)
|| graph.edges.iter().any(|edge| {
edge.from == node_path && matches!(edge.relation, ArtifactRelation::Persists)
})
}
}
}
fn relation_forms_sibling_cluster(relation: ArtifactRelation) -> bool {
matches!(
relation,
ArtifactRelation::References
| ArtifactRelation::Contains
| ArtifactRelation::Loads
| ArtifactRelation::Mounts
)
}
pub(super) fn build_sibling_clusters(graph: &ArtifactGraph) -> Vec<BTreeSet<String>> {
let mut parent_to_cluster: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
for edge in &graph.edges {
if relation_forms_sibling_cluster(edge.relation) {
let cluster = parent_to_cluster.entry(edge.from.clone()).or_default();
cluster.insert(edge.from.clone());
cluster.insert(edge.to.clone());
}
}
let clusters = parent_to_cluster.into_values().collect::<Vec<_>>();
merge_overlapping_clusters(clusters)
}
fn merge_overlapping_clusters(clusters: Vec<BTreeSet<String>>) -> Vec<BTreeSet<String>> {
if clusters.is_empty() {
return Vec::new();
}
let mut node_index: BTreeMap<String, usize> = BTreeMap::new();
for cluster in &clusters {
for node in cluster {
if !node_index.contains_key(node) {
let idx = node_index.len();
node_index.insert(node.clone(), idx);
}
}
}
let n = node_index.len();
let mut parent: Vec<usize> = (0..n).collect();
fn find(parent: &mut [usize], mut i: usize) -> usize {
while parent[i] != i {
parent[i] = parent[parent[i]]; i = parent[i];
}
i
}
fn union(parent: &mut [usize], a: usize, b: usize) {
let ra = find(parent, a);
let rb = find(parent, b);
if ra != rb {
parent[ra] = rb;
}
}
for cluster in &clusters {
let mut iter = cluster.iter();
let first = iter.next();
if let Some(first_node) = first {
let first_idx = node_index[first_node];
for node in iter {
union(&mut parent, first_idx, node_index[node]);
}
}
}
let mut root_to_set: BTreeMap<usize, BTreeSet<String>> = BTreeMap::new();
for (node, idx) in &node_index {
let root = find(&mut parent, *idx);
root_to_set.entry(root).or_default().insert(node.clone());
}
root_to_set.into_values().collect()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::artifact_graph::{ArtifactEdge, ArtifactNode, ArtifactRelation};
use crate::findings::ArtifactKind;
fn node(path: &str) -> ArtifactNode {
ArtifactNode {
path: path.to_string(),
kind: ArtifactKind::GenericArtifact,
capabilities: Vec::new(),
}
}
fn edge(from: &str, to: &str, relation: ArtifactRelation) -> ArtifactEdge {
ArtifactEdge {
from: from.to_string(),
to: to.to_string(),
relation,
endpoint_kind: None,
}
}
#[test]
fn build_sibling_clusters_includes_loads_and_mounts() {
let graph = ArtifactGraph {
nodes: vec![node("skill.md"), node("plugin.wasm"), node("vol")],
edges: vec![
edge("skill.md", "plugin.wasm", ArtifactRelation::Loads),
edge("skill.md", "vol", ArtifactRelation::Mounts),
],
};
let clusters = build_sibling_clusters(&graph);
assert!(
clusters
.iter()
.any(|c| c.contains("skill.md") && c.contains("plugin.wasm")),
"Loads edge must form a cluster; got {clusters:?}"
);
assert!(
clusters
.iter()
.any(|c| c.contains("skill.md") && c.contains("vol")),
"Mounts edge must form a cluster; got {clusters:?}"
);
}
#[test]
fn build_sibling_clusters_excludes_non_structural_edges() {
let graph = ArtifactGraph {
nodes: vec![node("a"), node("b")],
edges: vec![
edge("a", "b", ArtifactRelation::ConnectsTo),
edge("a", "b", ArtifactRelation::Reads),
edge("a", "b", ArtifactRelation::Writes),
],
};
let clusters = build_sibling_clusters(&graph);
assert!(
clusters.is_empty(),
"non-structural edges must NOT form clusters; got {clusters:?}"
);
}
#[test]
fn merge_overlapping_clusters_merges_transitively() {
let c1: BTreeSet<String> = ["a".to_string(), "b".to_string()].into_iter().collect();
let c2: BTreeSet<String> = ["c".to_string(), "d".to_string()].into_iter().collect();
let c3: BTreeSet<String> = ["b".to_string(), "c".to_string()].into_iter().collect();
let merged = merge_overlapping_clusters(vec![c1, c2, c3]);
assert_eq!(
merged.len(),
1,
"transitively connected clusters must merge into one; got {merged:?}"
);
assert!(
merged[0].contains("a") && merged[0].contains("d"),
"all transitively connected nodes must be in the merged cluster; got {:?}",
merged[0]
);
}
#[test]
fn merge_overlapping_clusters_preserves_disjoint() {
let c1: BTreeSet<String> = ["a".to_string(), "b".to_string()].into_iter().collect();
let c2: BTreeSet<String> = ["c".to_string(), "d".to_string()].into_iter().collect();
let merged = merge_overlapping_clusters(vec![c1, c2]);
assert_eq!(
merged.len(),
2,
"disjoint clusters must not merge; got {merged:?}"
);
}
}