skill-veil-core 0.2.0

Core library for skill-veil behavioral analysis
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
use crate::findings::{
    ArtifactScope, Finding, RecommendedAction, RootCauseGroup, SignalClass, ThreatCategory,
    VerdictReason,
};

/// Rule IDs that emit `DataExfiltration` evidence and participate in
/// the trusted-API-host downgrade. When EVERY data-exfiltration
/// finding in the relevant scope is one of these rules AND every
/// such finding is annotated with `sinks_trusted=true` in its
/// `match_value`, the compound exfil chain downgrades from
/// `MaliciousBehavior` to `ReviewSignal` — the per-finding downgrade
/// would otherwise be silently re-escalated by the compound layer.
///
/// Limited to the SECRET / IDENTITY taint rules that opt into the
/// downgrade in `artifact_taint::analysis::TRUSTED_HOST_DOWNGRADE_RULE_IDS`.
const TRUSTED_HOST_DOWNGRADE_TAINT_RULES: &[&str] = &[
    "ARTIFACT_TAINT_SECRET_TO_EXTERNAL_NETWORK",
    "ARTIFACT_TAINT_IDENTITY_TO_EXTERNAL_NETWORK",
];

pub(super) fn detect_compound_verdict_reasons(
    findings: &[Finding],
    raw_root_cause_groups: &[RootCauseGroup],
) -> Vec<VerdictReason> {
    [
        detect_prompt_tampering_with_exec(findings, raw_root_cause_groups),
        detect_credential_exfil_chain(findings, raw_root_cause_groups),
        detect_install_hook_with_exec_surface(findings, raw_root_cause_groups),
        detect_broad_permissions_with_autonomy(findings, raw_root_cause_groups),
        detect_mcp_remote_endpoint_with_exec(findings, raw_root_cause_groups),
        detect_heartbeat_poll_with_credential_read(findings, raw_root_cause_groups),
    ]
    .into_iter()
    .flatten()
    .collect()
}

// Use pre-calibration groups so calibration of individual rules cannot silently disable
// compound verdict detection. Compound patterns represent architectural risk that should
// be evaluated independently of calibration.
fn compound_has_category(
    raw_root_cause_groups: &[RootCauseGroup],
    category: ThreatCategory,
) -> bool {
    raw_root_cause_groups
        .iter()
        .any(|group| group.category == category && group.strongest_action != RecommendedAction::Log)
}

/// Find the most actionable scope for attribution where any actionable
/// group matching `category` appears.
///
/// `ArtifactScope`'s derived `Ord` ranks `AgentEntrypoint <
/// PackageRootArtifact < SupportingArtifact`. We want the entrypoint
/// scope when it's available because the entrypoint is the most
/// user-visible attribution surface (and the most actionable for
/// reviewers), so `.min()` is correct. The phrase "most specific" in
/// older comments was misleading — `AgentEntrypoint` is structurally
/// the broadest classification but the most actionable for compound
/// verdict attribution. Returns `None` if no actionable group matches.
fn most_specific_scope_for_category(
    raw_root_cause_groups: &[RootCauseGroup],
    category: ThreatCategory,
) -> Option<ArtifactScope> {
    raw_root_cause_groups
        .iter()
        .filter(|g| g.category == category && g.strongest_action != RecommendedAction::Log)
        .map(|g| g.scope)
        .min()
}

// Checks the *pre-calibration* finding action — calibration only modifies
// root_cause_groups, not individual findings. Use compound_has_category for
// calibrated rule ids.
fn compound_has_rule(findings: &[Finding], rule_id: &str) -> bool {
    debug_assert!(
        !crate::verdict_calibration::CALIBRATED_RULE_IDS.contains(&rule_id),
        "compound_has_rule checks pre-calibration actions; use compound_has_category for calibrated rule {rule_id}"
    );
    findings
        .iter()
        .any(|f| f.rule_id == rule_id && f.recommended_action != RecommendedAction::Log)
}

// Like compound_has_rule but also requires a specific artifact scope to avoid cross-scope false positives.
fn compound_has_rule_in_scope(findings: &[Finding], rule_id: &str, scope: ArtifactScope) -> bool {
    debug_assert!(
        !crate::verdict_calibration::CALIBRATED_RULE_IDS.contains(&rule_id),
        "compound_has_rule_in_scope checks pre-calibration actions; use compound_has_category for calibrated rule {rule_id}"
    );
    findings.iter().any(|f| {
        f.rule_id == rule_id
            && f.recommended_action != RecommendedAction::Log
            && f.artifact_scope == scope
    })
}

// Declared permissions contribute to compound verdicts by their mere presence, regardless of
// action level — compound patterns represent architectural risk that cannot be waived rule-by-rule.
fn compound_has_declared_permission_rule(findings: &[Finding], rule_id: &str) -> bool {
    findings
        .iter()
        .any(|f| f.rule_id == rule_id && f.artifact_scope == ArtifactScope::AgentEntrypoint)
}

fn compound_has_high_risk_autonomy(
    findings: &[Finding],
    raw_root_cause_groups: &[RootCauseGroup],
) -> bool {
    raw_root_cause_groups.iter().any(|group| {
        group.category == ThreatCategory::AutonomyEscalation
            && group.scope == ArtifactScope::AgentEntrypoint
            && (group.strongest_action == RecommendedAction::Block
                || group.signal_class == SignalClass::MaliciousBehavior)
    }) || compound_has_rule(findings, "OFFICIAL_APPROVAL_BYPASS_WITH_EXECUTION")
        || compound_has_rule(findings, "OFFICIAL_APPROVAL_BYPASS_DELETE_OR_MODIFY")
        || compound_has_rule(findings, "OFFICIAL_PROMPT_OVERRIDE_WITH_PERSISTENCE")
        || compound_has_rule(findings, "OFFICIAL_FORCED_APPROVAL_BYPASS")
}

fn detect_prompt_tampering_with_exec(
    _findings: &[Finding],
    raw_root_cause_groups: &[RootCauseGroup],
) -> Option<VerdictReason> {
    if compound_has_category(
        raw_root_cause_groups,
        ThreatCategory::PersistentPromptTampering,
    ) && compound_has_category(raw_root_cause_groups, ThreatCategory::RemoteExec)
    {
        Some(VerdictReason {
            scope: ArtifactScope::AgentEntrypoint,
            category: ThreatCategory::RemoteExec,
            signal_class: SignalClass::MaliciousBehavior,
            rationale: "Compound verdict: prompt override is paired with execution behavior"
                .to_string(),
        })
    } else {
        None
    }
}

fn detect_credential_exfil_chain(
    findings: &[Finding],
    raw_root_cause_groups: &[RootCauseGroup],
) -> Option<VerdictReason> {
    let cred_scope = most_specific_scope_for_category(
        raw_root_cause_groups,
        ThreatCategory::CredentialExposure,
    )?;
    let exfil_scope =
        most_specific_scope_for_category(raw_root_cause_groups, ThreatCategory::DataExfiltration)?;
    // Attribute the compound finding to the more specific (most actionable)
    // of the two contributing scopes. Without this, evidence sitting in the
    // primary entrypoint was previously labelled `SupportingArtifact`,
    // confusing audit trails and scope-keyed suppressions.
    let scope = cred_scope.min(exfil_scope);

    // Trusted-host downgrade respect: when every actionable
    // DataExfiltration finding in `scope` is a trust-downgraded
    // taint match (sinks_trusted=true), the per-finding emission was
    // already moved to ReviewSignal — re-escalating to
    // MaliciousBehavior here defeats that downgrade. Drop to
    // ReviewSignal so the compound chain still surfaces the chain
    // shape but no longer auto-blocks. A SINGLE non-trust-downgraded
    // exfil finding defeats the downgrade and the compound stays at
    // MaliciousBehavior.
    let exfil_findings_in_scope: Vec<&Finding> = findings
        .iter()
        .filter(|f| {
            f.category == ThreatCategory::DataExfiltration
                && f.artifact_scope == scope
                && f.recommended_action != RecommendedAction::Log
        })
        .collect();
    let signal_class = if !exfil_findings_in_scope.is_empty()
        && exfil_findings_in_scope
            .iter()
            .all(|f| is_trust_downgraded_taint(f))
    {
        SignalClass::ReviewSignal
    } else {
        SignalClass::MaliciousBehavior
    };

    Some(VerdictReason {
        scope,
        category: ThreatCategory::DataExfiltration,
        signal_class,
        rationale: "Compound verdict: token or session access is paired with outbound transmission"
            .to_string(),
    })
}

/// `true` when `finding` is one of the trust-opt-in taint rules AND
/// its `match_value` carries the `sinks_trusted=true` annotation
/// emitted by `artifact_taint::analysis::build_taint_finding` when
/// every external sink resolved to the API allowlist.
fn is_trust_downgraded_taint(finding: &Finding) -> bool {
    if !TRUSTED_HOST_DOWNGRADE_TAINT_RULES.contains(&finding.rule_id.as_str()) {
        return false;
    }
    finding.match_value.contains("sinks_trusted=true")
}

fn detect_install_hook_with_exec_surface(
    findings: &[Finding],
    raw_root_cause_groups: &[RootCauseGroup],
) -> Option<VerdictReason> {
    if compound_has_rule_in_scope(
        findings,
        "MANIFEST_PACKAGE_JSON_INSTALL_HOOK",
        ArtifactScope::PackageRootArtifact,
    ) && (compound_has_category(raw_root_cause_groups, ThreatCategory::RemoteExec)
        || compound_has_rule(findings, "OFFICIAL_REMOTE_FETCH_EXEC_POLYGLOT"))
    {
        Some(VerdictReason {
            scope: ArtifactScope::PackageRootArtifact,
            category: ThreatCategory::SupplyChain,
            signal_class: SignalClass::MaliciousBehavior,
            rationale: "Compound verdict: install hook is paired with remote fetch or execution"
                .to_string(),
        })
    } else {
        None
    }
}

fn detect_broad_permissions_with_autonomy(
    findings: &[Finding],
    raw_root_cause_groups: &[RootCauseGroup],
) -> Option<VerdictReason> {
    let has_broad_permission_combo =
        compound_has_declared_permission_rule(findings, "DECLARED_PERMISSION_BROWSER_FULL")
            || compound_has_declared_permission_rule(findings, "DECLARED_PERMISSION_SHELL_EXEC")
            || (compound_has_declared_permission_rule(
                findings,
                "DECLARED_PERMISSION_OAUTH_SCOPES",
            ) && compound_has_declared_permission_rule(
                findings,
                "DECLARED_PERMISSION_SECRETS_ACCESS",
            ));

    if has_broad_permission_combo
        && compound_has_high_risk_autonomy(findings, raw_root_cause_groups)
    {
        Some(VerdictReason {
            scope: ArtifactScope::AgentEntrypoint,
            category: ThreatCategory::AutonomyEscalation,
            signal_class: SignalClass::MaliciousBehavior,
            rationale:
                "Compound verdict: broad permissions are paired with autonomous execution semantics"
                    .to_string(),
        })
    } else {
        None
    }
}

fn detect_heartbeat_poll_with_credential_read(
    findings: &[Finding],
    raw_root_cause_groups: &[RootCauseGroup],
) -> Option<VerdictReason> {
    // Long-poll / heartbeat fetch paired with any credential-read behaviour is
    // classic agent-C2 architecture: the skill pulls instructions at a fixed
    // cadence while already holding a token, giving the operator remote
    // command-and-control without the skill ever matching an exec rule alone.
    if compound_has_rule(findings, "SKILL_HEARTBEAT_REMOTE_POLL")
        && compound_has_category(raw_root_cause_groups, ThreatCategory::CredentialExposure)
    {
        Some(VerdictReason {
            scope: ArtifactScope::AgentEntrypoint,
            category: ThreatCategory::AutonomyEscalation,
            signal_class: SignalClass::MaliciousBehavior,
            rationale:
                "Compound verdict: heartbeat polling is paired with credential or token access"
                    .to_string(),
        })
    } else {
        None
    }
}

fn detect_mcp_remote_endpoint_with_exec(
    findings: &[Finding],
    _raw_root_cause_groups: &[RootCauseGroup],
) -> Option<VerdictReason> {
    if compound_has_rule_in_scope(
        findings,
        "MCP_REMOTE_SERVER_ENDPOINT",
        ArtifactScope::PackageRootArtifact,
    ) && (compound_has_rule(findings, "MCP_REMOTE_EXEC_SURFACE")
        || compound_has_rule(findings, "MCP_TOOLING_TRANSPORT_DECLARED"))
    {
        Some(VerdictReason {
            scope: ArtifactScope::PackageRootArtifact,
            category: ThreatCategory::RemoteExec,
            signal_class: SignalClass::MaliciousBehavior,
            rationale: "Compound verdict: MCP remote endpoint is paired with command or stdio execution semantics"
                .to_string(),
        })
    } else {
        None
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::findings::Finding;

    fn taint_finding(rule_id: &str, sinks_trusted: bool) -> Finding {
        let suffix = if sinks_trusted {
            " sinks_trusted=true"
        } else {
            ""
        };
        Finding {
            rule_id: rule_id.to_string(),
            category: ThreatCategory::DataExfiltration,
            severity: crate::findings::Severity::Critical,
            confidence: 0.9,
            raw_confidence: 0.9,
            confidence_rationale: String::new(),
            matched_on: crate::findings::MatchTarget::ReferencedFile {
                path: "SKILL.md".to_string(),
            },
            match_value: format!(
                "family=exfil source=secret_access sink=https://api.openai.com/v1{suffix}"
            ),
            reason: String::new(),
            remediation: String::new(),
            recommended_action: if sinks_trusted {
                RecommendedAction::RequireApproval
            } else {
                RecommendedAction::Block
            },
            evidence_kind: crate::findings::EvidenceKind::Behavior,
            artifact_kind: crate::findings::ArtifactKind::SkillDocument,
            artifact_scope: ArtifactScope::AgentEntrypoint,
            signal_class: if sinks_trusted {
                SignalClass::ReviewSignal
            } else {
                SignalClass::MaliciousBehavior
            },
            artifact_path: Some("SKILL.md".to_string()),
            operational_contexts: Vec::new(),
            line_number: None,
            suppression: None,
        }
    }

    fn cred_group() -> RootCauseGroup {
        RootCauseGroup {
            scope: ArtifactScope::AgentEntrypoint,
            category: ThreatCategory::CredentialExposure,
            signal_class: SignalClass::ReviewSignal,
            finding_count: 1,
            strongest_action: RecommendedAction::RequireApproval,
            representative_rules: vec!["SKILL_SECRETS_DIR_WRITE".to_string()],
        }
    }

    fn exfil_group() -> RootCauseGroup {
        RootCauseGroup {
            scope: ArtifactScope::AgentEntrypoint,
            category: ThreatCategory::DataExfiltration,
            signal_class: SignalClass::ReviewSignal,
            finding_count: 1,
            strongest_action: RecommendedAction::RequireApproval,
            representative_rules: vec!["ARTIFACT_TAINT_SECRET_TO_EXTERNAL_NETWORK".to_string()],
        }
    }

    /// Contract: when EVERY DataExfiltration finding in scope is a
    /// trust-downgraded taint (`sinks_trusted=true`), the compound
    /// credential-exfil chain emits ReviewSignal — NOT
    /// MaliciousBehavior. Pre-fix the per-finding trust downgrade
    /// was silently re-escalated by the compound chain because the
    /// chain only consulted raw_root_cause_groups; an Atlassian /
    /// OpenAI / GitHub-only skill therefore stayed `malicious` at
    /// the verdict layer.
    #[test]
    fn credential_exfil_chain_respects_trust_downgrade() {
        let findings = vec![taint_finding(
            "ARTIFACT_TAINT_SECRET_TO_EXTERNAL_NETWORK",
            true,
        )];
        let groups = vec![cred_group(), exfil_group()];
        let reason = detect_credential_exfil_chain(&findings, &groups)
            .expect("chain should still emit a verdict reason");
        assert_eq!(
            reason.signal_class,
            SignalClass::ReviewSignal,
            "trust-downgraded taint must downgrade compound chain to ReviewSignal"
        );
    }

    /// Contract (negative): a single non-trust-downgraded taint
    /// finding defeats the trust downgrade — the compound chain
    /// stays at MaliciousBehavior so a real exfil signal cannot be
    /// laundered by mixing it with one trusted-host call.
    #[test]
    fn credential_exfil_chain_one_untrusted_defeats_downgrade() {
        let findings = vec![
            taint_finding("ARTIFACT_TAINT_SECRET_TO_EXTERNAL_NETWORK", true),
            taint_finding("ARTIFACT_TAINT_SECRET_TO_EXTERNAL_NETWORK", false),
        ];
        let groups = vec![cred_group(), exfil_group()];
        let reason = detect_credential_exfil_chain(&findings, &groups)
            .expect("chain should still emit a verdict reason");
        assert_eq!(
            reason.signal_class,
            SignalClass::MaliciousBehavior,
            "one untrusted taint sink must keep compound chain at MaliciousBehavior"
        );
    }

    /// Contract: when there is no DataExfiltration finding at all
    /// in the scope under consideration (e.g. the exfil evidence is
    /// in a different scope / artifact), the compound chain MUST
    /// still emit MaliciousBehavior — the trust downgrade only
    /// applies when actual taint findings are present and ALL
    /// trust-downgraded.
    #[test]
    fn credential_exfil_chain_no_in_scope_findings_stays_malicious() {
        let findings: Vec<Finding> = Vec::new();
        let groups = vec![cred_group(), exfil_group()];
        let reason = detect_credential_exfil_chain(&findings, &groups)
            .expect("chain should still emit a verdict reason");
        assert_eq!(
            reason.signal_class,
            SignalClass::MaliciousBehavior,
            "no in-scope exfil findings must keep chain at MaliciousBehavior"
        );
    }
}