Skip to main content

aperion_shield/diff/
evaluate.rs

1//! In-process corpus evaluation.
2//!
3//! Equivalent to `aperion-shield --check`, but stripped down for the
4//! diff use case:
5//!
6//!   * memory and burst-detector are **disabled** (`--no-memory`,
7//!     `--no-burst` equivalents). Both are stateful, so flipping them
8//!     on would make the second engine's evaluation depend on the
9//!     first engine's history and give us non-reproducible diffs.
10//!     The Python prototype does the same thing.
11//!   * Workspace context is still computed once and shared between
12//!     the two runs (it's a function of `--workspace`, not the rules).
13//!   * Output is in-process structs, not serialised JSON, so no
14//!     parse round-trip cost on big corpora.
15//!
16//! Output schema mirrors the JSON emitted by `--check` for the
17//! fields the diff explainer actually consumes. If new fields are
18//! added to `--check`'s JSON output, mirror them here only if the
19//! diff explainer needs them; otherwise we accumulate stale fields
20//! that confuse readers.
21
22use std::path::{Path, PathBuf};
23
24use anyhow::{anyhow, Context};
25use serde::Serialize;
26use serde_json::{json, Value};
27
28use crate::engine::{decide, Adjustments, Decision, Engine};
29use crate::WorkspaceContext;
30
31/// Options that apply equally to both engine runs (before / after).
32#[derive(Debug, Clone, Default)]
33pub struct EvalOptions {
34    /// Override the workspace root for the prod-probe. Same semantics
35    /// as `--check --workspace PATH`.
36    pub workspace: Option<PathBuf>,
37}
38
39/// One evaluation result, mirroring the JSON shape `--check` writes
40/// per line. Names are kept stable with the Python prototype's
41/// `DecisionLine` so the JSON output schema stays source-compatible.
42#[derive(Debug, Clone, Serialize)]
43pub struct DecisionLine {
44    pub decision: String,
45    pub primary_rule_id: Option<String>,
46    pub matched_rules: Vec<String>,
47    pub raw_severity: String,
48    pub composite_severity: String,
49    pub composite_points: u32,
50    pub reason: String,
51    pub input: Value,
52}
53
54/// Run the engine at `rules_path` over the JSON-Lines corpus,
55/// returning one [`DecisionLine`] per non-blank, non-comment input
56/// line in order. Invalid JSON lines map to an "allow" decision with
57/// a sentinel `reason` so the index pairing in the diff stays
58/// aligned with the corpus.
59pub fn evaluate_corpus(
60    rules_path: &Path,
61    corpus: &str,
62    opts: &EvalOptions,
63) -> anyhow::Result<Vec<DecisionLine>> {
64    let raw = std::fs::read_to_string(rules_path).with_context(|| {
65        format!("reading shieldset for evaluation from {}", rules_path.display())
66    })?;
67    let engine = Engine::from_yaml(&raw)
68        .with_context(|| format!("loading shieldset from {}", rules_path.display()))?;
69
70    // Workspace probe is shared across runs. Adaptive memory and
71    // burst detector are intentionally disabled for diff -- see the
72    // module-level docs for why.
73    let workspace = {
74        let mut policy = engine.policy.clone();
75        // Workspace probe stays enabled regardless of the engine's
76        // policy block: the diff explainer evaluates a *static*
77        // corpus, so we keep all signals that depend on inputs
78        // visible. The probe itself is deterministic for a given
79        // --workspace path.
80        policy.workspace_probe.enabled = true;
81        match &opts.workspace {
82            Some(p) => WorkspaceContext::probe_at(&policy, p),
83            None => WorkspaceContext::probe(&policy),
84        }
85    };
86
87    let mut out = Vec::new();
88    for raw_line in corpus.lines() {
89        let trimmed = raw_line.trim();
90        if trimmed.is_empty() || trimmed.starts_with('#') || trimmed.starts_with("//") {
91            continue;
92        }
93        let input: Value = match serde_json::from_str::<Value>(trimmed) {
94            Ok(v) => v,
95            Err(_) => {
96                out.push(DecisionLine {
97                    decision: "allow".into(),
98                    primary_rule_id: None,
99                    matched_rules: Vec::new(),
100                    raw_severity: "allow".into(),
101                    composite_severity: "allow".into(),
102                    composite_points: 0,
103                    reason: "invalid JSON in corpus line".into(),
104                    input: json!({"_raw": trimmed}),
105                });
106                continue;
107            }
108        };
109
110        let adj = Adjustments {
111            workspace_is_prod: workspace.is_prod,
112            ..Default::default()
113        };
114
115        // Two input shapes: text (llm_response scope) or tool-call.
116        // Identical to run_check_mode in src/main.rs.
117        let eval = if let Some(text) = input.get("text").and_then(|v| v.as_str()) {
118            engine.evaluate_text(text, adj)
119        } else {
120            let tool = input.get("tool").and_then(|v| v.as_str()).unwrap_or("");
121            let params = input.get("params").cloned().unwrap_or(Value::Null);
122            let canonical = if params.get("name").is_some() || params.get("arguments").is_some()
123            {
124                params.clone()
125            } else {
126                json!({ "name": tool, "arguments": params })
127            };
128            engine.evaluate(tool, &canonical, adj)
129        };
130
131        let decision = decide(&eval);
132        let label = decision.label().to_string();
133        let (primary_rule_id, reason) = match &decision {
134            Decision::Block { rule_id, reason, .. }
135            | Decision::Approval { rule_id, reason, .. }
136            | Decision::IdentityVerification { rule_id, reason, .. } => {
137                (Some(rule_id.clone()), reason.clone())
138            }
139            Decision::Warn { rule_id, banner, .. } => (Some(rule_id.clone()), banner.clone()),
140            Decision::Allow => (None, String::new()),
141        };
142
143        out.push(DecisionLine {
144            decision: label,
145            primary_rule_id,
146            matched_rules: eval.matches.iter().map(|m| m.rule_id.clone()).collect(),
147            raw_severity: eval.raw_severity.as_str().into(),
148            composite_severity: eval.composite_severity.as_str().into(),
149            composite_points: eval.composite_points,
150            reason,
151            input,
152        });
153    }
154    Ok(out)
155}
156
157/// Validate the rules path early so we can fail with a clearer error
158/// than "reading shieldset failed". Used by `run_diff_mode` when both
159/// paths are checked up-front.
160#[allow(dead_code)]
161pub fn ensure_rules_exists(p: &Path) -> anyhow::Result<()> {
162    if !p.is_file() {
163        return Err(anyhow!(
164            "shieldset not found at {} -- check the --rules-before / --rules-after paths",
165            p.display()
166        ));
167    }
168    Ok(())
169}