skill-veil-core 0.2.0

Core library for skill-veil behavioral analysis
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
//! Graph construction utilities for artifact dependency analysis.
//!
//! Provides [`build_artifact_graph`] which assembles an [`ArtifactGraph`] from
//! discovered files, classifies artifact kinds, infers cross-artifact
//! relationships, and identifies sibling package manifests and lockfiles.

use crate::artifact_graph::{ArtifactCapabilityFact, ArtifactGraph, ArtifactRelation};
use crate::findings::ArtifactKind;
use crate::ports::FileSystemProvider;
use crate::services::{ArtifactOrchestratorService, FileDiscoveryService};
use crate::SkillDocument;
use std::path::{Path, PathBuf};

pub(crate) fn build_artifact_graph<F: FileSystemProvider>(
    artifact_orchestration: &ArtifactOrchestratorService,
    fs_provider: &F,
    doc: &SkillDocument,
) -> ArtifactGraph {
    let mut graph = ArtifactGraph::new();
    let root_path = doc.path.display().to_string();
    graph.add_node_with_capabilities(
        root_path.clone(),
        artifact_kind_for_path::<F>(&doc.path),
        artifact_capabilities(artifact_orchestration, fs_provider, &doc.path),
    );
    add_inferred_relations(
        &mut graph,
        artifact_orchestration,
        fs_provider,
        &doc.path,
        &root_path,
    );

    if let Some(parent_dir) = doc.path.parent() {
        for manifest in sibling_package_manifests(fs_provider, parent_dir) {
            if manifest == doc.path {
                continue;
            }

            let manifest_path = manifest.display().to_string();
            let manifest_kind = artifact_kind_for_path::<F>(&manifest);
            graph.add_node_with_capabilities(
                manifest_path.clone(),
                manifest_kind,
                artifact_capabilities(artifact_orchestration, fs_provider, &manifest),
            );
            graph.add_edge(
                root_path.clone(),
                manifest_path.clone(),
                ArtifactRelation::Contains,
            );
            add_inferred_relations(
                &mut graph,
                artifact_orchestration,
                fs_provider,
                &manifest,
                &manifest_path,
            );

            for lockfile in sibling_expected_lockfiles_for_manifest(
                artifact_orchestration,
                fs_provider,
                &manifest,
                parent_dir,
            ) {
                let lockfile_path = lockfile.display().to_string();
                graph.add_node_with_capabilities(
                    lockfile_path.clone(),
                    ArtifactKind::Lockfile,
                    artifact_capabilities(artifact_orchestration, fs_provider, &lockfile),
                );
                graph.add_edge(
                    manifest_path.clone(),
                    lockfile_path,
                    ArtifactRelation::Locks,
                );
                add_inferred_relations(
                    &mut graph,
                    artifact_orchestration,
                    fs_provider,
                    &lockfile,
                    &lockfile.display().to_string(),
                );
            }
        }
    }

    for referenced_file in &doc.referenced_files {
        let referenced_path = referenced_file.display().to_string();
        graph.add_node_with_capabilities(
            referenced_path.clone(),
            artifact_kind_for_path::<F>(referenced_file),
            artifact_capabilities(artifact_orchestration, fs_provider, referenced_file),
        );
        graph.add_edge(
            root_path.clone(),
            referenced_path,
            ArtifactRelation::References,
        );
        add_inferred_relations(
            &mut graph,
            artifact_orchestration,
            fs_provider,
            referenced_file,
            &referenced_file.display().to_string(),
        );
    }

    graph
}

pub fn artifact_kind_for_path<F: FileSystemProvider>(path: &Path) -> ArtifactKind {
    let file_name = path
        .file_name()
        .and_then(|name| name.to_str())
        .map(str::to_ascii_lowercase);

    match file_name.as_deref() {
        Some("mcp.json" | "mcp.yaml" | "mcp.yml") => ArtifactKind::McpServerManifest,
        Some(
            "cargo.lock"
            | "poetry.lock"
            | "uv.lock"
            | "pipfile.lock"
            | "yarn.lock"
            | "pnpm-lock.yaml"
            | "npm-shrinkwrap.json"
            | "package-lock.json",
        ) => ArtifactKind::Lockfile,
        Some(
            "package.json"
            | "requirements.txt"
            | "pyproject.toml"
            | "cargo.toml"
            | "dockerfile"
            | "docker-compose.yml"
            | "docker-compose.yaml"
            | "makefile"
            | "gnumakefile"
            | ".npmrc"
            | "pip.conf",
        ) => ArtifactKind::PackageManifest,
        Some("agents.md" | "claude.md" | "system.md" | "persona.md" | "soul.md") => {
            ArtifactKind::AgentInstruction
        }
        Some(name) if name.ends_with(".prompt.md") => ArtifactKind::PromptPackDocument,
        _ if path
            .parent()
            .and_then(|parent| parent.file_name())
            .and_then(|name| name.to_str())
            .is_some_and(|name| name.eq_ignore_ascii_case("prompts")) =>
        {
            ArtifactKind::PromptPackDocument
        }
        _ if FileDiscoveryService::<F>::is_explicit_skill_file(path) => ArtifactKind::SkillDocument,
        _ => ArtifactKind::ReferencedArtifact,
    }
}

pub(crate) fn sibling_files<F: FileSystemProvider>(fs_provider: &F, path: &Path) -> Vec<PathBuf> {
    let Some(parent) = path.parent() else {
        return Vec::new();
    };
    const RELEVANT_NAMES: &[&str] = &[
        "package.json",
        "package-lock.json",
        "npm-shrinkwrap.json",
        "requirements.txt",
        "pyproject.toml",
        "cargo.toml",
        "cargo.lock",
        "poetry.lock",
        "uv.lock",
        "pipfile.lock",
        "dockerfile",
        "docker-compose.yml",
        "docker-compose.yaml",
        "makefile",
        "gnumakefile",
        ".npmrc",
        "pip.conf",
        "mcp.json",
        "mcp.yaml",
        "mcp.yml",
        "yarn.lock",
        "pnpm-lock.yaml",
    ];

    fs_provider
        .list_files(parent, "*", false)
        .unwrap_or_else(|e| {
            tracing::warn!(
                "I/O error listing files in {}: {e}; sibling detection skipped",
                parent.display()
            );
            Vec::new()
        })
        .into_iter()
        .filter(|p| {
            let file_name = p
                .file_name()
                .and_then(|n| n.to_str())
                .map(str::to_ascii_lowercase);
            let extension = p
                .extension()
                .and_then(|e| e.to_str())
                .map(str::to_ascii_lowercase);
            file_name
                .as_deref()
                .is_some_and(|name| RELEVANT_NAMES.contains(&name))
                || matches!(
                    extension.as_deref(),
                    Some(
                        "sh" | "bash"
                            | "zsh"
                            | "ksh"
                            | "fish"
                            | "ps1"
                            | "py"
                            | "js"
                            | "ts"
                            | "mjs"
                            | "cjs"
                            | "mts"
                            | "cts"
                            | "rb"
                            | "pl"
                            | "rs"
                            | "go"
                            | "php",
                    )
                )
        })
        .collect()
}

/// Recover a SHA-256 package id from any ancestor directory name.
///
/// # Identity contract
///
/// SHA-256 hex digests in this codebase are always **lowercase**.
/// `is_ascii_hexdigit()` accepts uppercase too, which would let a path
/// segment like `AAAA...` (64 uppercase chars) qualify as a package id.
/// Two copies of the same package whose path differs only in casing
/// would then receive distinct ids and evade case-sensitive baseline /
/// waiver matching. The check below is intentionally stricter than
/// `is_ascii_hexdigit()` to keep ids canonical.
///
/// Kept in sync with `is_sha256_hex` in `crates/skill-veil-cli/src/vt/cross_check.rs`.
pub fn derive_package_id(path: &Path) -> Option<String> {
    path.ancestors()
        .filter_map(|ancestor| ancestor.file_name().and_then(|name| name.to_str()))
        .find(|segment| segment.len() == SHA256_HEX_LEN && segment.bytes().all(is_lower_hex_byte))
        .map(ToOwned::to_owned)
}

/// Length, in characters, of a SHA-256 digest rendered in lowercase hex
/// (32 raw bytes × 2 hex chars). Anchors `derive_package_id` so it can
/// recognise the canonical hex layout without a magic literal.
const SHA256_HEX_LEN: usize = 64;

#[inline]
fn is_lower_hex_byte(b: u8) -> bool {
    matches!(b, b'0'..=b'9' | b'a'..=b'f')
}

fn artifact_capabilities<F: FileSystemProvider>(
    artifact_orchestration: &ArtifactOrchestratorService,
    fs_provider: &F,
    path: &Path,
) -> Vec<ArtifactCapabilityFact> {
    let Ok(fc) = fs_provider.read_file_bytes(path) else {
        return Vec::new();
    };
    let decoded = fc.decode_utf8_lossy();
    if decoded.decode_warning {
        // The primary scanner pipeline emits a `decode_warning_finding`
        // for the artifact itself; this trace is the audit trail for the
        // graph-derived capability inference path, which would otherwise
        // analyze a likely-binary file as though it were valid text and
        // silently propagate noisy capabilities.
        tracing::warn!(
            path = %path.display(),
            "graph capability inference using lossy UTF-8 decode (likely binary content)"
        );
    }
    artifact_orchestration.infer_capabilities(path, &decoded.text)
}

fn add_inferred_relations<F: FileSystemProvider>(
    graph: &mut ArtifactGraph,
    artifact_orchestration: &ArtifactOrchestratorService,
    fs_provider: &F,
    path: &Path,
    source_path: &str,
) {
    let Ok(fc) = fs_provider.read_file_bytes(path) else {
        return;
    };
    let decoded = fc.decode_utf8_lossy();
    if decoded.decode_warning {
        tracing::warn!(
            path = %path.display(),
            "graph relation inference using lossy UTF-8 decode (likely binary content)"
        );
    }
    for link in artifact_orchestration.infer_relations(path, &decoded.text) {
        // Classify the inferred target by its path before inserting it
        // into the graph. Pre-fix every inferred target was registered
        // as `ArtifactKind::GenericArtifact`; if the target was, say,
        // an `mcp.json` discovered only through a relation (not as a
        // sibling manifest), the graph never recorded it as
        // `McpServerManifest` and downstream taint rules / blast-radius
        // rules that key on `ArtifactKind` were silently skipped.
        // `add_node_with_capabilities`'s specificity-promotion rule
        // means a later, better-typed insertion can still upgrade the
        // node, but for inferred-only targets that never get a second
        // insertion, this is the only chance to classify them correctly.
        let target_path = std::path::PathBuf::from(&link.target);
        let target_kind = artifact_kind_for_path::<F>(&target_path);
        graph.add_node(link.target.clone(), target_kind);
        graph.add_edge(source_path.to_string(), link.target, link.relation);
    }
}

fn sibling_package_manifests<F: FileSystemProvider>(fs_provider: &F, path: &Path) -> Vec<PathBuf> {
    const MANIFEST_NAMES: &[&str] = &[
        "package.json",
        "mcp.json",
        "mcp.yaml",
        "mcp.yml",
        "requirements.txt",
        "pyproject.toml",
        "cargo.toml",
        "dockerfile",
        "docker-compose.yml",
        "docker-compose.yaml",
        "makefile",
        "gnumakefile",
        ".npmrc",
        "pip.conf",
    ];

    fs_provider
        .list_files(path, "*", false)
        .unwrap_or_else(|e| {
            tracing::warn!(
                "I/O error listing files in {}: {e}; manifest detection skipped",
                path.display()
            );
            Vec::new()
        })
        .into_iter()
        .filter(|p| {
            p.file_name()
                .and_then(|name| name.to_str())
                .is_some_and(|name| MANIFEST_NAMES.contains(&name.to_ascii_lowercase().as_str()))
        })
        .collect()
}

fn sibling_expected_lockfiles_for_manifest<F: FileSystemProvider>(
    artifact_orchestration: &ArtifactOrchestratorService,
    fs_provider: &F,
    manifest: &Path,
    parent_dir: &Path,
) -> Vec<PathBuf> {
    let Ok(fc) = fs_provider.read_file_bytes(manifest) else {
        return Vec::new();
    };
    let content = fc.decode_utf8_lossy().text;
    artifact_orchestration
        .expected_lockfiles(manifest, &content)
        .into_iter()
        .map(|name| parent_dir.join(name))
        .filter(|path| fs_provider.exists(path))
        .collect()
}

#[cfg(test)]
mod derive_package_id_tests {
    use super::derive_package_id;
    use std::path::PathBuf;

    #[test]
    fn accepts_lowercase_hex_64() {
        let sha = "a".repeat(64);
        let p = PathBuf::from(format!("/tmp/{sha}/SKILL.md"));
        assert_eq!(derive_package_id(&p), Some(sha));
    }

    /// Contract: uppercase hex MUST be rejected. SHA-256 outputs from the
    /// project's hashing path are always lowercase; aborting on uppercase
    /// keeps package_id values canonical and case-sensitive baseline
    /// matching consistent.
    #[test]
    fn rejects_uppercase_hex_64() {
        let upper = "A".repeat(64);
        let p = PathBuf::from(format!("/tmp/{upper}/SKILL.md"));
        assert!(derive_package_id(&p).is_none());
    }

    #[test]
    fn rejects_mixed_case_hex_64() {
        let mut s = String::with_capacity(64);
        for i in 0..64 {
            s.push(if i % 2 == 0 { 'a' } else { 'B' });
        }
        let p = PathBuf::from(format!("/tmp/{s}/SKILL.md"));
        assert!(derive_package_id(&p).is_none());
    }

    #[test]
    fn rejects_short_hex() {
        let p = PathBuf::from("/tmp/abcdef/SKILL.md");
        assert!(derive_package_id(&p).is_none());
    }
}