Skip to main content

skill_veil_core/
artifact_graph.rs

1//! Artifact graph for representing relationships between scanned assets.
2
3use crate::findings::ArtifactKind;
4use serde::{Deserialize, Serialize};
5
6/// Capability exposed or requested by an artifact.
7#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
8#[serde(rename_all = "snake_case")]
9pub enum ArtifactCapability {
10    BrowserAccess,
11    NetworkAccess,
12    InstallExecution,
13    ExposesBinary,
14    PrivilegedRuntime,
15    HostFilesystemAccess,
16    ProcessExecution,
17    SecretAccess,
18    PersistenceSurface,
19    FilesystemWrite,
20    IdentityAccess,
21    InboundNetworkSurface,
22}
23
24/// Origin of a capability assessment.
25#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
26#[serde(rename_all = "snake_case")]
27pub enum ArtifactCapabilitySource {
28    Declared,
29    Observed,
30}
31
32/// A capability attached to an artifact, including how it was derived.
33#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
34pub struct ArtifactCapabilityFact {
35    pub capability: ArtifactCapability,
36    pub source: ArtifactCapabilitySource,
37}
38
39/// A node in the scanned artifact graph.
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct ArtifactNode {
42    pub path: String,
43    pub kind: ArtifactKind,
44    #[serde(default, skip_serializing_if = "Vec::is_empty")]
45    pub capabilities: Vec<ArtifactCapabilityFact>,
46}
47
48/// Describes the network endpoint category for a ConnectsTo or Downloads edge.
49#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
50#[serde(rename_all = "snake_case")]
51pub enum EndpointKind {
52    /// Publicly addressable remote endpoint (attacker-controlled or external service).
53    Remote,
54    /// Known package registry (npm, PyPI, crates.io, …). Downloads from these are lower risk.
55    Registry,
56    /// Ephemeral or tunneled endpoint (ngrok, trycloudflare, …).
57    Transient,
58    /// Cloud provider metadata/control-plane endpoint (169.254.169.254, …).
59    ControlPlane,
60    /// Loopback or LAN-local endpoint.
61    Local,
62}
63
64/// A directed edge between two artifacts.
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct ArtifactEdge {
67    pub from: String,
68    pub to: String,
69    pub relation: ArtifactRelation,
70    #[serde(default, skip_serializing_if = "Option::is_none")]
71    pub endpoint_kind: Option<EndpointKind>,
72}
73
74/// Relationship between two artifacts.
75#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
76#[serde(rename_all = "snake_case")]
77pub enum ArtifactRelation {
78    References,
79    Contains,
80    Locks,
81    Downloads,
82    Executes,
83    Loads,
84    Persists,
85    Mounts,
86    ConnectsTo,
87    Reads,
88    Writes,
89    AccessesSecrets,
90}
91
92/// Lightweight graph describing scanned artifacts and their relationships.
93#[derive(Debug, Clone, Default, Serialize, Deserialize)]
94pub struct ArtifactGraph {
95    pub nodes: Vec<ArtifactNode>,
96    pub edges: Vec<ArtifactEdge>,
97}
98
99impl ArtifactGraph {
100    #[must_use]
101    pub fn new() -> Self {
102        Self::default()
103    }
104
105    pub fn add_node(&mut self, path: impl Into<String>, kind: ArtifactKind) {
106        let path = path.into();
107        self.add_node_with_capabilities(path, kind, Vec::new());
108    }
109
110    pub fn add_node_with_capabilities(
111        &mut self,
112        path: impl Into<String>,
113        kind: ArtifactKind,
114        capabilities: Vec<ArtifactCapabilityFact>,
115    ) {
116        let path = path.into();
117        if let Some(existing) = self.nodes.iter_mut().find(|node| node.path == path) {
118            // Promote `kind` to the more specific classification. Pipeline
119            // ordering is not deterministic, so a "first wins" rule would
120            // silently lose more-specific kinds discovered later (e.g.
121            // `McpServerManifest` arriving after a generic `AgentInstruction`
122            // pre-classification). See `ArtifactKind::specificity` for the
123            // tier ordering and the tests in this module that pin the
124            // contract.
125            if kind.specificity() > existing.kind.specificity() {
126                existing.kind = kind;
127            }
128            for capability in capabilities {
129                if !existing.capabilities.iter().any(|fact| {
130                    fact.capability == capability.capability && fact.source == capability.source
131                }) {
132                    existing.capabilities.push(capability);
133                }
134            }
135            return;
136        }
137
138        self.nodes.push(ArtifactNode {
139            path,
140            kind,
141            capabilities,
142        });
143    }
144
145    pub fn add_edge(
146        &mut self,
147        from: impl Into<String>,
148        to: impl Into<String>,
149        relation: ArtifactRelation,
150    ) {
151        self.add_edge_with_endpoint(from, to, relation, None);
152    }
153
154    pub fn add_edge_with_endpoint(
155        &mut self,
156        from: impl Into<String>,
157        to: impl Into<String>,
158        relation: ArtifactRelation,
159        endpoint_kind: Option<EndpointKind>,
160    ) {
161        let edge = ArtifactEdge {
162            from: from.into(),
163            to: to.into(),
164            relation,
165            endpoint_kind,
166        };
167
168        // Edge identity is `(from, to, relation)` — `endpoint_kind` is an
169        // *annotation* on the edge, not a discriminator. Pre-fix the dedup
170        // also required `existing.endpoint_kind == edge.endpoint_kind`,
171        // so two calls adding the same `(from, to, relation)` triple with
172        // different annotations (e.g. `Some(Remote)` vs `Some(Registry)`)
173        // both survived and produced two distinct edges. The taint engine
174        // and capability scoring then double-counted the same logical edge.
175        // The fix dedupes on the triple and *upgrades* the existing
176        // annotation to the more-adversarial value when a duplicate fires.
177        if let Some(existing) = self.edges.iter_mut().find(|existing| {
178            existing.from == edge.from
179                && existing.to == edge.to
180                && std::mem::discriminant(&existing.relation)
181                    == std::mem::discriminant(&edge.relation)
182        }) {
183            existing.endpoint_kind =
184                upgrade_endpoint_kind(existing.endpoint_kind, edge.endpoint_kind);
185            return;
186        }
187
188        self.edges.push(edge);
189    }
190}
191
192/// Pick the higher-priority annotation when two edges with the same
193/// `(from, to, relation)` triple converge. Used by
194/// [`ArtifactGraph::add_edge_with_endpoint`] to keep edge identity stable
195/// while preserving the most-adversarial annotation. Priority (highest
196/// first):
197///
198/// 1. [`EndpointKind::ControlPlane`] — cloud metadata (IMDS), highest signal.
199/// 2. [`EndpointKind::Transient`] — ngrok / trycloudflare tunnels.
200/// 3. [`EndpointKind::Remote`] — public attacker-controlled.
201/// 4. [`EndpointKind::Local`] — loopback / LAN.
202/// 5. [`EndpointKind::Registry`] — known package registry, lowest concern.
203/// 6. `None` — unknown, lowest priority.
204fn upgrade_endpoint_kind(
205    existing: Option<EndpointKind>,
206    incoming: Option<EndpointKind>,
207) -> Option<EndpointKind> {
208    fn rank(kind: Option<EndpointKind>) -> u8 {
209        match kind {
210            Some(EndpointKind::ControlPlane) => 5,
211            Some(EndpointKind::Transient) => 4,
212            Some(EndpointKind::Remote) => 3,
213            Some(EndpointKind::Local) => 2,
214            Some(EndpointKind::Registry) => 1,
215            None => 0,
216        }
217    }
218    if rank(incoming) > rank(existing) {
219        incoming
220    } else {
221        existing
222    }
223}
224
225#[cfg(test)]
226mod tests {
227    use super::*;
228
229    /// Contract: re-inserting the same path with a more specific
230    /// `ArtifactKind` upgrades the recorded kind. Without this,
231    /// pipeline-ordering randomness silently shadowed
232    /// `McpServerManifest` (specificity 4) behind an earlier
233    /// `AgentInstruction` (specificity 3) classification.
234    #[test]
235    fn add_node_promotes_to_more_specific_kind() {
236        let mut g = ArtifactGraph::new();
237        g.add_node("/pkg/manifest", ArtifactKind::AgentInstruction);
238        g.add_node("/pkg/manifest", ArtifactKind::McpServerManifest);
239        let node = g
240            .nodes
241            .iter()
242            .find(|n| n.path == "/pkg/manifest")
243            .expect("node must exist");
244        assert_eq!(
245            node.kind,
246            ArtifactKind::McpServerManifest,
247            "More specific kind MUST replace less specific one"
248        );
249    }
250
251    /// Inverse direction: a less specific later insertion does NOT demote.
252    #[test]
253    fn add_node_does_not_demote_kind() {
254        let mut g = ArtifactGraph::new();
255        g.add_node("/pkg/manifest", ArtifactKind::McpServerManifest);
256        g.add_node("/pkg/manifest", ArtifactKind::GenericArtifact);
257        let node = g.nodes.iter().find(|n| n.path == "/pkg/manifest").unwrap();
258        assert_eq!(
259            node.kind,
260            ArtifactKind::McpServerManifest,
261            "Less specific kind MUST NOT demote a more specific one"
262        );
263    }
264
265    /// Idempotent: same kind twice doesn't change anything.
266    #[test]
267    fn add_node_is_idempotent_for_same_kind() {
268        let mut g = ArtifactGraph::new();
269        g.add_node("/pkg/x", ArtifactKind::PackageManifest);
270        g.add_node("/pkg/x", ArtifactKind::PackageManifest);
271        assert_eq!(
272            g.nodes.iter().filter(|n| n.path == "/pkg/x").count(),
273            1,
274            "Re-inserting the same path must NOT duplicate the node"
275        );
276    }
277
278    /// Equal-specificity insertions keep the first one (stable behaviour
279    /// within a tier; only cross-tier upgrades fire).
280    #[test]
281    fn add_node_keeps_first_within_same_specificity_tier() {
282        let mut g = ArtifactGraph::new();
283        g.add_node("/pkg/x", ArtifactKind::PackageManifest); // tier 4
284        g.add_node("/pkg/x", ArtifactKind::McpServerManifest); // tier 4
285        let node = g.nodes.iter().find(|n| n.path == "/pkg/x").unwrap();
286        assert_eq!(node.kind, ArtifactKind::PackageManifest);
287    }
288
289    /// # Contract
290    ///
291    /// Two `add_edge_with_endpoint` calls with the same
292    /// `(from, to, relation)` triple but different `endpoint_kind`
293    /// annotations MUST collapse to a single edge whose annotation is
294    /// upgraded to the more-adversarial value. Pre-fix the dedup
295    /// included `endpoint_kind` in the equality, so the same logical
296    /// download edge appeared twice when one detector annotated it
297    /// `Some(Remote)` and another annotated it `Some(Registry)`,
298    /// inflating taint-engine path counts and capability scoring.
299    #[test]
300    fn add_edge_dedupes_on_triple_and_upgrades_endpoint_annotation() {
301        let mut g = ArtifactGraph::new();
302        g.add_edge_with_endpoint(
303            "a",
304            "b",
305            ArtifactRelation::Downloads,
306            Some(EndpointKind::Registry),
307        );
308        g.add_edge_with_endpoint(
309            "a",
310            "b",
311            ArtifactRelation::Downloads,
312            Some(EndpointKind::Remote),
313        );
314        assert_eq!(
315            g.edges.len(),
316            1,
317            "duplicate (from,to,relation) MUST NOT produce two edges; got {:?}",
318            g.edges
319        );
320        assert_eq!(
321            g.edges[0].endpoint_kind,
322            Some(EndpointKind::Remote),
323            "annotation must upgrade to the more-adversarial value (Remote > Registry)"
324        );
325    }
326
327    /// # Contract (priority order)
328    ///
329    /// Higher-priority annotations win regardless of insertion order.
330    /// ControlPlane (IMDS) is the highest priority; Transient (ngrok),
331    /// Remote, Local, and Registry follow in descending order. `None`
332    /// is the lowest priority.
333    #[test]
334    fn add_edge_endpoint_priority_order_preserves_highest() {
335        let mut g = ArtifactGraph::new();
336        // Insert in reverse priority — the final annotation must still
337        // be ControlPlane.
338        g.add_edge_with_endpoint("a", "b", ArtifactRelation::Downloads, None);
339        g.add_edge_with_endpoint(
340            "a",
341            "b",
342            ArtifactRelation::Downloads,
343            Some(EndpointKind::Registry),
344        );
345        g.add_edge_with_endpoint(
346            "a",
347            "b",
348            ArtifactRelation::Downloads,
349            Some(EndpointKind::Local),
350        );
351        g.add_edge_with_endpoint(
352            "a",
353            "b",
354            ArtifactRelation::Downloads,
355            Some(EndpointKind::Remote),
356        );
357        g.add_edge_with_endpoint(
358            "a",
359            "b",
360            ArtifactRelation::Downloads,
361            Some(EndpointKind::Transient),
362        );
363        g.add_edge_with_endpoint(
364            "a",
365            "b",
366            ArtifactRelation::Downloads,
367            Some(EndpointKind::ControlPlane),
368        );
369        assert_eq!(g.edges.len(), 1);
370        assert_eq!(
371            g.edges[0].endpoint_kind,
372            Some(EndpointKind::ControlPlane),
373            "ControlPlane (IMDS) MUST be the surviving annotation"
374        );
375    }
376
377    /// # Contract (negative)
378    ///
379    /// Edges with different `relation` MUST stay distinct even when
380    /// `(from, to)` matches — a `Downloads` and a `Reads` edge between
381    /// the same pair encode different semantics and the taint engine
382    /// distinguishes them.
383    #[test]
384    fn add_edge_keeps_different_relations_distinct() {
385        let mut g = ArtifactGraph::new();
386        g.add_edge("a", "b", ArtifactRelation::Downloads);
387        g.add_edge("a", "b", ArtifactRelation::Reads);
388        assert_eq!(g.edges.len(), 2);
389    }
390}