pub mod evaluate;
pub mod render;
use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
use anyhow::{anyhow, Context};
use serde::Serialize;
use serde_yaml::Value as YamlValue;
pub use evaluate::{evaluate_corpus, DecisionLine, EvalOptions};
#[derive(Debug, Clone)]
pub struct DiffOptions {
pub rules_before: PathBuf,
pub rules_after: PathBuf,
pub corpus: Option<PathBuf>,
pub workspace: Option<PathBuf>,
pub format: OutputFormat,
pub max_samples: usize,
pub fail_if_flipped: bool,
pub fail_if_loosened: bool,
pub fail_if_allows_loosened: Option<usize>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum OutputFormat {
Text,
Markdown,
Json,
}
impl OutputFormat {
pub fn parse(s: &str) -> anyhow::Result<Self> {
match s {
"text" => Ok(OutputFormat::Text),
"markdown" | "md" => Ok(OutputFormat::Markdown),
"json" => Ok(OutputFormat::Json),
other => Err(anyhow!(
"unknown --format '{}': must be one of text|markdown|json",
other
)),
}
}
}
#[derive(Debug, Clone, Serialize)]
pub struct RuleDelta {
pub rule_id: String,
pub status: String,
#[serde(skip_serializing_if = "String::is_empty")]
pub yaml_diff: String,
pub fires_before: usize,
pub fires_after: usize,
#[serde(skip_serializing)]
pub flipped_lines_caused: Vec<(String, String, serde_json::Value)>,
}
pub type FlipCounter = BTreeMap<(String, String), usize>;
pub const DECISIONS: [&str; 4] = ["allow", "warn", "approval", "block"];
fn severity_rank(d: &str) -> u8 {
match d {
"allow" => 0,
"warn" => 1,
"approval" | "identity_verification" => 2,
"block" => 3,
_ => 99,
}
}
pub fn loosening_count(flips: &FlipCounter) -> usize {
flips
.iter()
.filter(|((b, a), _)| severity_rank(a) < severity_rank(b))
.map(|(_, c)| *c)
.sum()
}
pub fn flips_to_allow(flips: &FlipCounter) -> usize {
flips
.iter()
.filter(|((_, a), _)| a == "allow")
.map(|(_, c)| *c)
.sum()
}
pub fn load_ruleset_yaml(
path: &Path,
) -> anyhow::Result<BTreeMap<String, YamlValue>> {
let raw = std::fs::read_to_string(path)
.with_context(|| format!("reading shieldset YAML from {}", path.display()))?;
let root: YamlValue = serde_yaml::from_str(&raw)
.with_context(|| format!("parsing YAML at {}", path.display()))?;
let YamlValue::Mapping(top) = &root else {
anyhow::bail!("{} did not parse as a YAML mapping", path.display());
};
let shieldset = top
.get(YamlValue::String("shieldset".into()))
.unwrap_or(&root);
let rules = match shieldset {
YamlValue::Mapping(m) => m.get(YamlValue::String("rules".into())).cloned(),
_ => None,
};
let Some(YamlValue::Sequence(rules)) = rules else {
return Ok(BTreeMap::new());
};
let mut out: BTreeMap<String, YamlValue> = BTreeMap::new();
for r in rules {
let YamlValue::Mapping(mut m) = r else { continue };
let Some(YamlValue::String(rid)) = m.remove(YamlValue::String("id".into())) else {
continue;
};
out.insert(rid, YamlValue::Mapping(m));
}
Ok(out)
}
pub fn yaml_dump_rule(rid: &str, body: &YamlValue) -> String {
let mut top = serde_yaml::Mapping::new();
top.insert(YamlValue::String("id".into()), YamlValue::String(rid.into()));
if let YamlValue::Mapping(m) = body {
for (k, v) in m {
top.insert(k.clone(), v.clone());
}
}
let wrapped = YamlValue::Sequence(vec![YamlValue::Mapping(top)]);
serde_yaml::to_string(&wrapped).unwrap_or_default()
}
pub fn diff_rulesets(
before: &BTreeMap<String, YamlValue>,
after: &BTreeMap<String, YamlValue>,
) -> BTreeMap<String, RuleDelta> {
use similar::{ChangeTag, TextDiff};
let mut all_ids: std::collections::BTreeSet<&String> = before.keys().collect();
all_ids.extend(after.keys());
let mut deltas = BTreeMap::new();
for rid in all_ids {
let in_before = before.contains_key(rid);
let in_after = after.contains_key(rid);
let (status, yaml_diff): (&str, String) = match (in_before, in_after) {
(true, false) => {
let dumped = yaml_dump_rule(rid, &before[rid]);
let diff = dumped
.lines()
.map(|l| format!("- {}", l))
.collect::<Vec<_>>()
.join("\n");
("removed", diff)
}
(false, true) => {
let dumped = yaml_dump_rule(rid, &after[rid]);
let diff = dumped
.lines()
.map(|l| format!("+ {}", l))
.collect::<Vec<_>>()
.join("\n");
("added", diff)
}
(true, true) if before[rid] == after[rid] => ("unchanged", String::new()),
_ => {
let b_yaml = yaml_dump_rule(rid, &before[rid]);
let a_yaml = yaml_dump_rule(rid, &after[rid]);
let diff = TextDiff::from_lines(&b_yaml, &a_yaml);
let mut out = String::new();
out.push_str(&format!("--- {}.before\n", rid));
out.push_str(&format!("+++ {}.after\n", rid));
for change in diff.iter_all_changes() {
let sign = match change.tag() {
ChangeTag::Delete => "-",
ChangeTag::Insert => "+",
ChangeTag::Equal => " ",
};
out.push_str(sign);
out.push_str(change.value());
}
("modified", out)
}
};
deltas.insert(
rid.clone(),
RuleDelta {
rule_id: rid.clone(),
status: status.to_string(),
yaml_diff,
fires_before: 0,
fires_after: 0,
flipped_lines_caused: Vec::new(),
},
);
}
deltas
}
pub fn populate_behavior(
deltas: &mut BTreeMap<String, RuleDelta>,
before: &[DecisionLine],
after: &[DecisionLine],
) -> FlipCounter {
if before.len() != after.len() {
eprintln!(
"warn: decision counts differ ({} vs {}); pairing by index",
before.len(),
after.len()
);
}
let n = before.len().min(after.len());
let mut flips: FlipCounter = BTreeMap::new();
for i in 0..n {
let b = &before[i];
let a = &after[i];
for rid in &b.matched_rules {
if let Some(d) = deltas.get_mut(rid) {
d.fires_before += 1;
}
}
for rid in &a.matched_rules {
if let Some(d) = deltas.get_mut(rid) {
d.fires_after += 1;
}
}
if b.decision != a.decision {
*flips
.entry((b.decision.clone(), a.decision.clone()))
.or_insert(0) += 1;
for rid in &a.matched_rules {
if let Some(d) = deltas.get_mut(rid) {
if matches!(d.status.as_str(), "added" | "modified") {
d.flipped_lines_caused.push((
b.decision.clone(),
a.decision.clone(),
b.input.clone(),
));
}
}
}
for rid in &b.matched_rules {
if let Some(d) = deltas.get_mut(rid) {
if d.status == "removed" {
d.flipped_lines_caused.push((
b.decision.clone(),
a.decision.clone(),
b.input.clone(),
));
}
}
}
}
}
flips
}
pub async fn run_diff_mode(opts: DiffOptions) -> anyhow::Result<i32> {
let before_yaml = load_ruleset_yaml(&opts.rules_before)?;
let after_yaml = load_ruleset_yaml(&opts.rules_after)?;
let corpus_bytes = read_corpus(opts.corpus.as_deref())?;
if corpus_bytes.trim().is_empty() {
anyhow::bail!("corpus is empty");
}
let corpus_line_count = corpus_bytes
.lines()
.filter(|l| {
let t = l.trim();
!t.is_empty() && !t.starts_with('#') && !t.starts_with("//")
})
.count();
let eval_opts = EvalOptions {
workspace: opts.workspace.clone(),
};
let before_decisions = evaluate_corpus(&opts.rules_before, &corpus_bytes, &eval_opts)?;
let after_decisions = evaluate_corpus(&opts.rules_after, &corpus_bytes, &eval_opts)?;
let mut decision_before: BTreeMap<String, usize> = BTreeMap::new();
for d in DECISIONS {
decision_before.insert(d.into(), 0);
}
for d in &before_decisions {
*decision_before.entry(d.decision.clone()).or_insert(0) += 1;
}
let mut decision_after: BTreeMap<String, usize> = BTreeMap::new();
for d in DECISIONS {
decision_after.insert(d.into(), 0);
}
for d in &after_decisions {
*decision_after.entry(d.decision.clone()).or_insert(0) += 1;
}
let mut deltas = diff_rulesets(&before_yaml, &after_yaml);
let flips = populate_behavior(&mut deltas, &before_decisions, &after_decisions);
let before_label = opts.rules_before.display().to_string();
let after_label = opts.rules_after.display().to_string();
let out = match opts.format {
OutputFormat::Text => render::render_text(
&before_label,
&after_label,
corpus_line_count,
&decision_before,
&decision_after,
&deltas,
&flips,
opts.max_samples,
),
OutputFormat::Markdown => render::render_markdown(
&before_label,
&after_label,
corpus_line_count,
&decision_before,
&decision_after,
&deltas,
&flips,
opts.max_samples,
),
OutputFormat::Json => render::render_json(
&before_label,
&after_label,
corpus_line_count,
&decision_before,
&decision_after,
&deltas,
&flips,
),
};
print!("{}", out);
if !out.ends_with('\n') {
println!();
}
let total_flipped: usize = flips.values().sum();
if let Some(threshold) = opts.fail_if_allows_loosened {
if flips_to_allow(&flips) > threshold {
return Ok(1);
}
}
if opts.fail_if_loosened && loosening_count(&flips) > 0 {
return Ok(1);
}
if opts.fail_if_flipped && total_flipped > 0 {
return Ok(1);
}
Ok(0)
}
fn read_corpus(path: Option<&Path>) -> anyhow::Result<String> {
use std::io::Read;
if let Some(p) = path {
return std::fs::read_to_string(p)
.with_context(|| format!("reading corpus from {}", p.display()));
}
if atty_stdin() {
anyhow::bail!(
"no corpus on stdin and no --corpus PATH given.\n\
hint: aperion-shield --diff --corpus tests/corpus/golden.jsonl \
--rules-before X --rules-after Y"
);
}
let mut buf = String::new();
std::io::stdin().read_to_string(&mut buf)?;
Ok(buf)
}
fn atty_stdin() -> bool {
#[cfg(unix)]
{
unsafe { libc_isatty(0) }
}
#[cfg(not(unix))]
{
true
}
}
#[cfg(unix)]
unsafe fn libc_isatty(fd: i32) -> bool {
extern "C" {
fn isatty(fd: i32) -> i32;
}
isatty(fd) == 1
}