#![allow(clippy::field_reassign_with_default)]
use regex::Regex;
use serde::Deserialize;
use std::collections::{BTreeSet, HashMap};
use std::fs;
use std::path::{Path, PathBuf};
#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SourceType {
Spec,
VendorDocs,
VendorCode,
Paper,
Community,
}
#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
pub enum NormativeLevel {
Must,
Should,
BestPractice,
}
#[derive(Debug, Clone, Default, Deserialize)]
pub struct AppliesTo {
#[serde(default)]
pub tool: Option<String>,
#[serde(default)]
pub version_range: Option<String>,
#[serde(default)]
pub spec_revision: Option<String>,
}
#[derive(Debug, Clone, Deserialize)]
pub struct TestCoverage {
pub unit: bool,
pub fixtures: bool,
pub e2e: bool,
}
#[derive(Debug, Clone, Deserialize)]
pub struct Evidence {
pub source_type: SourceType,
pub source_urls: Vec<String>,
pub verified_on: String,
pub applies_to: AppliesTo,
pub normative_level: NormativeLevel,
pub tests: TestCoverage,
}
#[derive(Debug, Clone, Deserialize)]
pub struct FixMetadata {
pub autofix: bool,
#[serde(default)]
pub fix_safety: Option<String>,
}
#[derive(Debug, Deserialize)]
struct RulesIndex {
total_rules: usize,
rules: Vec<RuleEntry>,
}
#[derive(Debug, Deserialize)]
struct RuleEntry {
id: String,
#[allow(dead_code)]
name: String,
severity: String,
category: String,
evidence: Evidence,
fix: FixMetadata,
}
fn workspace_root() -> &'static Path {
use std::sync::OnceLock;
static ROOT: OnceLock<PathBuf> = OnceLock::new();
ROOT.get_or_init(|| {
let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
for ancestor in manifest_dir.ancestors() {
let cargo_toml = ancestor.join("Cargo.toml");
if let Ok(content) = fs::read_to_string(&cargo_toml)
&& (content.contains("[workspace]") || content.contains("[workspace."))
{
return ancestor.to_path_buf();
}
}
panic!(
"Failed to locate workspace root from CARGO_MANIFEST_DIR={}",
manifest_dir.display()
);
})
.as_path()
}
fn load_rules_json() -> RulesIndex {
let rules_path = workspace_root().join("knowledge-base/rules.json");
let content = fs::read_to_string(&rules_path)
.unwrap_or_else(|e| panic!("Failed to read {}: {}", rules_path.display(), e));
serde_json::from_str(&content)
.unwrap_or_else(|e| panic!("Failed to parse {}: {}", rules_path.display(), e))
}
fn extract_sarif_rule_ids() -> BTreeSet<String> {
let rules_index = load_rules_json();
rules_index.rules.iter().map(|r| r.id.clone()).collect()
}
fn extract_implemented_rule_ids() -> BTreeSet<String> {
let core_src = workspace_root().join("crates/agnix-core/src");
let mut rule_ids = BTreeSet::new();
let re = Regex::new(r#""([A-Z]+-(?:[A-Z]+-)?[0-9]+)""#).unwrap();
let valid_prefixes = [
"AS-", "CC-SK-", "CC-HK-", "CC-AG-", "CC-MEM-", "CC-OS-", "CC-PL-", "CC-SET-", "AGM-",
"MCP-", "COP-", "CUR-", "CLN-", "CDX-", "OC-", "GM-", "GM-AG-", "XML-", "REF-", "PE-",
"XP-", "VER-", "WS-", "CR-SK-", "CL-SK-", "CP-SK-", "CX-SK-", "OC-SK-", "WS-SK-", "KR-SK-",
"KR-AG-", "KR-HK-", "KR-PW-", "KR-MCP-", "KR-SET-", "KIRO-", "AMP-SK-", "AMP-", "RC-SK-",
"ROO-",
];
fn extract_from_file(
path: &Path,
re: &Regex,
valid_prefixes: &[&str],
rule_ids: &mut BTreeSet<String>,
) {
if let Ok(content) = fs::read_to_string(path) {
for cap in re.captures_iter(&content) {
let rule_id = &cap[1];
if valid_prefixes.iter().any(|p| rule_id.starts_with(p)) {
rule_ids.insert(rule_id.to_string());
}
}
}
}
fn scan_rules_recursive(
dir: &Path,
re: &Regex,
valid_prefixes: &[&str],
rule_ids: &mut BTreeSet<String>,
) {
let entries = fs::read_dir(dir)
.unwrap_or_else(|e| panic!("Failed to read rules directory {}: {}", dir.display(), e));
for entry in entries {
let entry = entry.expect("Failed to read directory entry");
let path = entry.path();
if path.is_dir() {
scan_rules_recursive(&path, re, valid_prefixes, rule_ids);
} else if path.extension().is_some_and(|ext| ext == "rs") {
extract_from_file(&path, re, valid_prefixes, rule_ids);
}
}
}
let rules_dir = core_src.join("rules");
scan_rules_recursive(&rules_dir, &re, &valid_prefixes, &mut rule_ids);
for entry in fs::read_dir(&core_src)
.unwrap_or_else(|e| panic!("Failed to read core src dir {}: {}", core_src.display(), e))
{
let entry = entry.expect("Failed to read directory entry");
let path = entry.path();
if path.extension().is_some_and(|ext| ext == "rs") {
extract_from_file(&path, &re, &valid_prefixes, &mut rule_ids);
}
}
rule_ids
}
fn scan_fixtures_for_coverage() -> HashMap<String, Vec<String>> {
let fixtures_dir = workspace_root().join("tests/fixtures");
let mut coverage: HashMap<String, Vec<String>> = HashMap::new();
let re = Regex::new(r"[A-Z]+-(?:[A-Z]+-)?[0-9]+").unwrap();
fn scan_dir_recursive(dir: &Path, re: &Regex, coverage: &mut HashMap<String, Vec<String>>) {
if !dir.is_dir() {
return;
}
for entry in fs::read_dir(dir).unwrap() {
let entry = entry.unwrap();
let path = entry.path();
if path.is_dir() {
scan_dir_recursive(&path, re, coverage);
} else if path.is_file() {
if let Ok(content) = fs::read_to_string(&path) {
for cap in re.captures_iter(&content) {
let rule_id = cap[0].to_string();
let fixture_path = path.to_string_lossy().to_string();
coverage
.entry(rule_id)
.or_default()
.push(fixture_path.clone());
}
}
let filename = path.file_name().unwrap().to_string_lossy().to_lowercase();
for cap in re.captures_iter(&filename.to_uppercase()) {
let rule_id = cap[0].to_string();
let fixture_path = path.to_string_lossy().to_string();
coverage.entry(rule_id).or_default().push(fixture_path);
}
}
}
}
scan_dir_recursive(&fixtures_dir, &re, &mut coverage);
coverage
}
fn infer_fixture_coverage(rules: &[RuleEntry]) -> HashMap<String, Vec<String>> {
let fixtures_dir = workspace_root().join("tests/fixtures");
let mut coverage: HashMap<String, Vec<String>> = HashMap::new();
let category_to_dirs: HashMap<&str, Vec<&str>> = [
(
"agent-skills",
vec!["skills", "invalid/skills", "valid/skills"],
),
(
"claude-skills",
vec!["skills", "invalid/skills", "valid/skills"],
),
("claude-hooks", vec!["valid/hooks", "invalid/hooks"]),
("claude-agents", vec!["valid/agents", "invalid/agents"]),
("claude-memory", vec!["valid/memory", "invalid/memory"]),
(
"claude-settings",
vec!["valid/claude-settings", "invalid/claude-settings"],
),
(
"claude-output-styles",
vec!["valid/output-styles", "invalid/output-styles"],
),
("claude-plugins", vec!["valid/plugins", "invalid/plugins"]),
("agents-md", vec!["agents_md"]),
("mcp", vec!["mcp"]),
(
"copilot",
vec!["copilot", "copilot-invalid", "copilot-too-long"],
),
("cursor", vec!["cursor", "cursor-invalid", "cursor-legacy"]),
("cline", vec!["cline", "cline-invalid"]),
("xml", vec!["xml"]),
("references", vec!["refs"]),
(
"prompt-engineering",
vec!["prompt", "invalid/pe", "valid/pe"],
),
(
"cross-platform",
vec!["cross_platform", "per_client_skills"],
),
("opencode", vec!["opencode", "opencode-invalid"]),
("cursor-skills", vec!["per_client_skills"]),
("cline-skills", vec!["per_client_skills"]),
("copilot-skills", vec!["per_client_skills"]),
("codex-skills", vec!["per_client_skills"]),
("opencode-skills", vec!["per_client_skills"]),
("windsurf-skills", vec!["per_client_skills"]),
("kiro-skills", vec!["per_client_skills"]),
("kiro-agents", vec!["kiro-agents"]),
("kiro-hooks", vec!["kiro-hooks", "kiro-agents"]),
("kiro-mcp", vec!["kiro-mcp", "kiro-powers"]),
("kiro-powers", vec!["kiro-powers"]),
(
"kiro-settings",
vec!["valid/kiro-settings", "invalid/kiro-settings"],
),
("amp-skills", vec!["per_client_skills"]),
("amp-checks", vec!["amp-checks"]),
("roo-code-skills", vec!["per_client_skills"]),
("gemini-cli", vec!["gemini_md", "gemini_md-invalid"]),
(
"gemini-agents",
vec!["valid/gemini-agents", "invalid/gemini-agents"],
),
("codex", vec!["codex", "codex-invalid"]),
("roo-code", vec!["roo-code"]),
("windsurf", vec!["windsurf", "windsurf-legacy"]),
("kiro-steering", vec!["kiro-steering"]),
]
.into_iter()
.collect();
for rule in rules {
if let Some(dirs) = category_to_dirs.get(rule.category.as_str()) {
for dir in dirs {
let full_path = fixtures_dir.join(dir);
if full_path.exists() {
coverage
.entry(rule.id.clone())
.or_default()
.push(full_path.to_string_lossy().to_string());
}
}
}
}
coverage
}
#[test]
fn test_all_rules_registered_in_sarif() {
let rules_index = load_rules_json();
let sarif_rules = extract_sarif_rule_ids();
let documented_rules: BTreeSet<String> =
rules_index.rules.iter().map(|r| r.id.clone()).collect();
let missing_from_sarif: Vec<&String> = documented_rules.difference(&sarif_rules).collect();
let extra_in_sarif: Vec<&String> = sarif_rules.difference(&documented_rules).collect();
let mut report = String::new();
if !missing_from_sarif.is_empty() {
report.push_str(&format!(
"\nMissing from SARIF ({} rules):\n",
missing_from_sarif.len()
));
for rule in &missing_from_sarif {
report.push_str(&format!(" - {}\n", rule));
}
}
if !extra_in_sarif.is_empty() {
report.push_str(&format!(
"\nExtra in SARIF (not in rules.json) ({} rules):\n",
extra_in_sarif.len()
));
for rule in &extra_in_sarif {
report.push_str(&format!(" - {}\n", rule));
}
}
assert!(
missing_from_sarif.is_empty() && extra_in_sarif.is_empty(),
"SARIF rule parity check failed:\n{}\nSARIF has {} rules, rules.json has {} rules",
report,
sarif_rules.len(),
documented_rules.len()
);
}
#[test]
fn test_all_rules_implemented() {
let rules_index = load_rules_json();
let implemented_rules = extract_implemented_rule_ids();
let documented_rules: BTreeSet<String> =
rules_index.rules.iter().map(|r| r.id.clone()).collect();
let not_implemented: Vec<&String> = documented_rules.difference(&implemented_rules).collect();
if !not_implemented.is_empty() {
let mut report = format!(
"Rules documented but not found in implementation ({}):\n",
not_implemented.len()
);
for rule in ¬_implemented {
report.push_str(&format!(" - {}\n", rule));
}
report.push_str("\nNote: This may indicate:\n");
report.push_str(" 1. Rule not yet implemented\n");
report.push_str(" 2. Rule ID string not found in source (check spelling)\n");
eprintln!("{}", report);
}
assert!(
not_implemented.is_empty(),
"{} rules are documented in rules.json but not implemented:\n{}",
not_implemented.len(),
not_implemented
.iter()
.map(|r| format!(" - {}", r))
.collect::<Vec<_>>()
.join("\n")
);
}
#[test]
fn test_fixture_coverage_exists() {
let rules_index = load_rules_json();
let explicit_coverage = scan_fixtures_for_coverage();
let inferred_coverage = infer_fixture_coverage(&rules_index.rules);
let mut all_coverage: HashMap<String, Vec<String>> = explicit_coverage;
for (rule, fixtures) in inferred_coverage {
all_coverage.entry(rule).or_default().extend(fixtures);
}
let documented_rules: BTreeSet<String> =
rules_index.rules.iter().map(|r| r.id.clone()).collect();
let covered_rules: BTreeSet<String> = all_coverage.keys().cloned().collect();
let not_covered: Vec<&String> = documented_rules.difference(&covered_rules).collect();
assert!(
not_covered.is_empty(),
"{} rules are documented but have no test fixture coverage:\n{}\nAdd test fixtures for uncovered rules.",
not_covered.len(),
not_covered
.iter()
.map(|r| format!(" - {}", r))
.collect::<Vec<_>>()
.join("\n")
);
}
#[test]
fn test_rules_json_integrity() {
let rules_index = load_rules_json();
assert_eq!(
rules_index.rules.len(),
rules_index.total_rules,
"The 'total_rules' field ({}) in rules.json does not match the actual number of rules ({})",
rules_index.total_rules,
rules_index.rules.len()
);
assert_eq!(
rules_index.rules.len(),
agnix_rules::rule_count(),
"Expected {} rules in rules.json, found {}",
agnix_rules::rule_count(),
rules_index.rules.len(),
);
let mut seen: BTreeSet<String> = BTreeSet::new();
for rule in &rules_index.rules {
assert!(
seen.insert(rule.id.clone()),
"Duplicate rule ID found: {}",
rule.id
);
}
let valid_severities = ["HIGH", "MEDIUM", "LOW"];
for rule in &rules_index.rules {
assert!(
valid_severities.contains(&rule.severity.as_str()),
"Invalid severity '{}' for rule {}",
rule.severity,
rule.id
);
}
let valid_categories = [
"agent-skills",
"claude-skills",
"claude-hooks",
"claude-agents",
"claude-memory",
"claude-output-styles",
"claude-settings",
"gemini-agents",
"agents-md",
"claude-plugins",
"mcp",
"copilot",
"cursor",
"cline",
"gemini-cli",
"codex",
"windsurf",
"xml",
"references",
"prompt-engineering",
"cross-platform",
"opencode",
"version-awareness",
"cursor-skills",
"cline-skills",
"copilot-skills",
"codex-skills",
"opencode-skills",
"windsurf-skills",
"kiro-skills",
"kiro-agents",
"kiro-hooks",
"kiro-mcp",
"kiro-powers",
"kiro-settings",
"kiro-steering",
"amp-skills",
"amp-checks",
"roo-code-skills",
"roo-code",
];
for rule in &rules_index.rules {
assert!(
valid_categories.contains(&rule.category.as_str()),
"Invalid category '{}' for rule {}",
rule.category,
rule.id
);
}
}
#[test]
fn test_rules_json_matches_validation_rules_md() {
let rules_index = load_rules_json();
let validation_rules_path = workspace_root().join("knowledge-base/VALIDATION-RULES.md");
let content = fs::read_to_string(&validation_rules_path)
.unwrap_or_else(|e| panic!("Failed to read {}: {}", validation_rules_path.display(), e));
let mut missing_in_md: Vec<String> = Vec::new();
for rule in &rules_index.rules {
let patterns = [
format!("<a id=\"{}\"></a>", rule.id.to_lowercase()),
format!("### {} ", rule.id),
format!("### {}[", rule.id),
];
let found = patterns.iter().any(|p| content.contains(p));
if !found {
missing_in_md.push(rule.id.clone());
}
}
assert!(
missing_in_md.is_empty(),
"Rules in rules.json but not found in VALIDATION-RULES.md:\n{}",
missing_in_md
.iter()
.map(|r| format!(" - {}", r))
.collect::<Vec<_>>()
.join("\n")
);
}
#[test]
fn test_sarif_rule_count() {
let sarif_rules = extract_sarif_rule_ids();
let rules_index = load_rules_json();
assert_eq!(
sarif_rules.len(),
rules_index.total_rules,
"SARIF should have {} rules, found {}. Missing or extra rules detected.",
rules_index.total_rules,
sarif_rules.len(),
);
}
#[test]
fn test_all_rules_have_evidence_metadata() {
let rules_index = load_rules_json();
let date_re = Regex::new(r"^\d{4}-\d{2}-\d{2}$").unwrap();
for rule in &rules_index.rules {
assert!(
!rule.evidence.source_urls.is_empty(),
"Rule {} has no source URLs in evidence metadata",
rule.id
);
assert!(
date_re.is_match(&rule.evidence.verified_on),
"Rule {} has invalid verified_on date format: '{}'. Expected YYYY-MM-DD",
rule.id,
rule.evidence.verified_on
);
}
}
#[test]
fn test_evidence_source_urls_valid() {
let rules_index = load_rules_json();
let url_re = Regex::new(r"^https?://[^\s]+$").unwrap();
for rule in &rules_index.rules {
for url in &rule.evidence.source_urls {
assert!(
url_re.is_match(url),
"Rule {} has invalid source URL: '{}'",
rule.id,
url
);
assert!(
url.trim() == url,
"Rule {} source URL has whitespace: '{}'",
rule.id,
url
);
}
}
}
#[test]
fn test_normative_level_consistency() {
let rules_index = load_rules_json();
let mut inconsistencies = Vec::new();
for rule in &rules_index.rules {
let is_high_severity = rule.severity == "HIGH";
let is_must_level = rule.evidence.normative_level == NormativeLevel::Must;
if is_high_severity && !is_must_level {
if rule.evidence.source_type == SourceType::Spec
|| rule.evidence.source_type == SourceType::VendorDocs
{
inconsistencies.push(format!(
"{}: HIGH severity but {:?} normative level (source: {:?})",
rule.id, rule.evidence.normative_level, rule.evidence.source_type
));
}
}
}
if !inconsistencies.is_empty() {
eprintln!(
"\nNote: {} rules have HIGH severity but non-MUST normative level:",
inconsistencies.len()
);
for msg in &inconsistencies {
eprintln!(" - {}", msg);
}
}
}
#[test]
fn test_evidence_source_type_distribution() {
let rules_index = load_rules_json();
let mut by_source: HashMap<String, usize> = HashMap::new();
for rule in &rules_index.rules {
let key = format!("{:?}", rule.evidence.source_type);
*by_source.entry(key).or_insert(0) += 1;
}
eprintln!("\nEvidence source type distribution:");
let mut sorted: Vec<_> = by_source.iter().collect();
sorted.sort_by(|a, b| b.1.cmp(a.1));
for (source_type, count) in sorted {
eprintln!(" {}: {}", source_type, count);
}
assert!(
by_source.len() >= 3,
"Expected at least 3 different source types, found {}",
by_source.len()
);
}
#[test]
fn test_evidence_test_coverage_accuracy() {
let rules_index = load_rules_json();
let explicit_coverage = scan_fixtures_for_coverage();
let inferred_coverage = infer_fixture_coverage(&rules_index.rules);
let mut all_coverage: HashMap<String, Vec<String>> = explicit_coverage;
for (rule, fixtures) in inferred_coverage {
all_coverage.entry(rule).or_default().extend(fixtures);
}
let mut mismatches = Vec::new();
for rule in &rules_index.rules {
let has_fixtures = all_coverage.contains_key(&rule.id);
let claims_fixtures = rule.evidence.tests.fixtures;
if claims_fixtures && !has_fixtures {
mismatches.push(format!("{}: claims fixtures=true but none found", rule.id));
}
}
assert!(
mismatches.is_empty(),
"Evidence test coverage mismatches:\n{}",
mismatches.join("\n")
);
}
#[test]
fn test_applies_to_tool_values() {
let rules_index = load_rules_json();
let valid_tools = agnix_rules::valid_tools();
for rule in &rules_index.rules {
if let Some(ref tool) = rule.evidence.applies_to.tool {
assert!(
valid_tools.contains(&tool.as_str()),
"Rule {} has unknown tool '{}'. Valid tools: {:?}",
rule.id,
tool,
valid_tools
);
}
}
}
#[test]
fn test_tool_rule_prefixes_consistency() {
let valid_tools = agnix_rules::valid_tools();
for (prefix, tool) in agnix_rules::TOOL_RULE_PREFIXES {
assert!(
valid_tools.contains(tool),
"Tool '{}' from prefix '{}' is not in VALID_TOOLS. \
TOOL_RULE_PREFIXES and VALID_TOOLS must be consistent.",
tool,
prefix
);
}
}
#[test]
fn test_all_rules_have_fix_metadata() {
let rules_index = load_rules_json();
let valid_fix_safety = ["safe", "unsafe", "safe/unsafe"];
for rule in &rules_index.rules {
if rule.fix.autofix {
let safety = rule.fix.fix_safety.as_deref().unwrap_or("");
assert!(
valid_fix_safety.contains(&safety),
"Rule {} has autofix=true but invalid fix_safety: '{}'. Expected one of: {:?}",
rule.id,
safety,
valid_fix_safety
);
} else {
assert!(
rule.fix.fix_safety.is_none(),
"Rule {} has autofix=false but fix_safety is set to '{}'",
rule.id,
rule.fix.fix_safety.as_deref().unwrap_or("")
);
}
}
}
#[test]
fn test_autofix_count_matches_documentation() {
let rules_index = load_rules_json();
let autofix_count = rules_index.rules.iter().filter(|r| r.fix.autofix).count();
let validation_rules_path = workspace_root().join("knowledge-base/VALIDATION-RULES.md");
let content = fs::read_to_string(&validation_rules_path)
.unwrap_or_else(|e| panic!("Failed to read {}: {}", validation_rules_path.display(), e));
let re = Regex::new(r"Auto-Fixable\*\*:\s*(\d+)\s*rules").unwrap();
let cap = re.captures(&content).unwrap_or_else(|| {
panic!(
"Could not find documented auto-fixable rule count in {} using pattern {:?}. \
The footer format may have changed.",
validation_rules_path.display(),
re.as_str()
)
});
let documented_count: usize = cap[1].parse().unwrap();
assert_eq!(
autofix_count, documented_count,
"rules.json has {} auto-fixable rules but VALIDATION-RULES.md documents {}",
autofix_count, documented_count
);
}
#[test]
fn test_is_tool_alias_case_sensitivity() {
use agnix_core::LintConfig;
let aliases = ["Copilot", "COPILOT", "copilot"];
for alias in aliases {
let mut config = LintConfig::default();
config.set_tools(vec![alias.to_string()]);
assert!(
config.is_rule_enabled("COP-001"),
"Alias '{}' should match 'github-copilot' and enable COP-* rules",
alias
);
}
}