skill-veil-core 0.1.3

Core library for skill-veil behavioral analysis
Documentation
use super::summarization::{sink_summary, source_summary};
use super::utils::{
    artifact_kind_for_node, artifact_paths, build_sibling_clusters, node_has_sink, node_has_source,
};
use super::ArtifactTaintRuleGroup;
use crate::artifact_graph::ArtifactGraph;
use crate::findings::{EvidenceKind, Finding, MatchTarget};
use std::collections::BTreeSet;

pub(super) fn derive_per_node_taint_findings(
    graph: &ArtifactGraph,
    groups: &[ArtifactTaintRuleGroup],
) -> Vec<Finding> {
    let mut findings = Vec::new();
    for node_path in &artifact_paths(graph) {
        for group in groups {
            if !node_has_source(graph, node_path, group.source)
                || !node_has_sink(graph, node_path, group.sink)
            {
                continue;
            }
            let src = source_summary(graph, node_path, group.source);
            let snk = sink_summary(graph, node_path, group.sink);
            let kind = artifact_kind_for_node(graph, node_path);
            for rule in &group.rules {
                findings.push(
                    Finding::builder(rule.id.clone(), rule.category)
                        .severity(rule.severity)
                        .confidence(rule.confidence)
                        .action(rule.action)
                        .evidence_kind(EvidenceKind::Behavior)
                        .artifact(kind, Some(node_path.clone()))
                        .matched_on(MatchTarget::ReferencedFile {
                            path: node_path.clone(),
                        })
                        .match_value(format!(
                            "family={} source={} sink={}",
                            rule.family, src, snk
                        ))
                        .reason(rule.reason.clone())
                        .build(),
                );
            }
        }
    }
    findings
}

pub(super) fn derive_cross_node_taint_findings(
    graph: &ArtifactGraph,
    groups: &[ArtifactTaintRuleGroup],
) -> Vec<Finding> {
    // Cap per-cluster findings to avoid quadratic explosion when a parent
    // references many children that each expose sources and sinks.
    const MAX_CROSS_NODE_FINDINGS_PER_CLUSTER: usize = 50;
    // Global cap across all clusters: without this, `per_group_budget * N
    // sibling_clusters` can far exceed the per-cluster constant. Monorepo-style
    // packages with many parent-child relationships are the typical trigger.
    const MAX_CROSS_NODE_FINDINGS_TOTAL: usize = 100;
    let sibling_clusters = build_sibling_clusters(graph);
    // Divide budget across groups so every source-sink family gets representation,
    // even when a high-volume group would otherwise exhaust the entire budget.
    debug_assert!(
        groups.len() <= MAX_CROSS_NODE_FINDINGS_PER_CLUSTER,
        "Number of taint rule groups ({}) exceeds per-cluster budget ({}); each group will be capped to 1 finding",
        groups.len(),
        MAX_CROSS_NODE_FINDINGS_PER_CLUSTER
    );
    let per_group_budget = if groups.is_empty() {
        0
    } else {
        (MAX_CROSS_NODE_FINDINGS_PER_CLUSTER / groups.len()).max(1)
    };
    let mut findings = Vec::new();
    for cluster in &sibling_clusters {
        if cluster.len() < 2 {
            continue;
        }
        if findings.len() >= MAX_CROSS_NODE_FINDINGS_TOTAL {
            break;
        }
        for group in groups {
            let source_nodes: Vec<&String> = cluster
                .iter()
                .filter(|path| node_has_source(graph, path, group.source))
                .collect();
            let sink_nodes: Vec<&String> = cluster
                .iter()
                .filter(|path| node_has_sink(graph, path, group.sink))
                .collect();
            let mut group_finding_count = 0_usize;
            'group: for source_node in &source_nodes {
                for sink_node in &sink_nodes {
                    if source_node == sink_node {
                        continue; // already covered by per-node pass
                    }
                    let src = source_summary(graph, source_node, group.source);
                    let snk = sink_summary(graph, sink_node, group.sink);
                    let kind = artifact_kind_for_node(graph, source_node);
                    for rule in &group.rules {
                        // Check budgets *before* pushing each finding.
                        // Per-group budget prevents a single group from
                        // monopolising the cluster budget. Global total
                        // cap prevents `per_group_budget * N clusters`
                        // from exceeding the intended ceiling.
                        if findings.len() >= MAX_CROSS_NODE_FINDINGS_TOTAL {
                            break 'group;
                        }
                        if group_finding_count >= per_group_budget {
                            break 'group;
                        }
                        // `artifact_path` and `matched_on` BOTH point at the
                        // source node. Pre-fix the artifact was attributed to
                        // the source while `matched_on` pointed at the sink,
                        // so a single finding referenced two distinct files —
                        // confusing for auditors and breaking suppression
                        // path-matching (which keys on `artifact_path`). The
                        // source/sink relationship is preserved verbatim in
                        // `match_value` (`source={src} sink={snk}`).
                        findings.push(
                            Finding::builder(rule.id.clone(), rule.category)
                                .severity(rule.severity)
                                .confidence(rule.confidence * 0.9)
                                .action(rule.action)
                                .evidence_kind(EvidenceKind::Behavior)
                                .artifact(kind, Some((*source_node).clone()))
                                .matched_on(MatchTarget::ReferencedFile {
                                    path: (*source_node).clone(),
                                })
                                .match_value(format!(
                                    "family={} source={} sink={}",
                                    rule.family, src, snk
                                ))
                                .reason(rule.reason.clone())
                                .build(),
                        );
                        group_finding_count += 1;
                    }
                }
            }
        }
    }
    findings
}

// Suppress the unused import warning — BTreeSet is used by build_sibling_clusters
// which returns Vec<BTreeSet<String>> but the type is inferred.
const _: () = {
    let _ = std::mem::size_of::<BTreeSet<String>>();
};