use std::collections::{BTreeMap, BTreeSet};
use std::env;
use std::fs;
use std::path::{Path, PathBuf};
const MAX_RULES_FILE_SIZE: u64 = 5 * 1024 * 1024;
fn find_workspace_root(start: &Path) -> Option<PathBuf> {
start
.ancestors()
.find(|path| {
path.join("Cargo.toml")
.exists()
.then(|| fs::read_to_string(path.join("Cargo.toml")).ok())
.flatten()
.is_some_and(|content| {
content.contains("[workspace]") || content.contains("[workspace.")
})
})
.map(|p| p.to_path_buf())
}
fn main() {
let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
let manifest_path = Path::new(&manifest_dir);
let crate_rules = manifest_path.join("rules.json");
let workspace_rules =
find_workspace_root(manifest_path).map(|root| root.join("knowledge-base/rules.json"));
println!("cargo:rerun-if-changed={}", crate_rules.display());
let rules_path = if crate_rules.exists() {
crate_rules
} else if let Some(ws_rules) = workspace_rules {
if ws_rules.exists() {
println!("cargo:rerun-if-changed={}", ws_rules.display());
ws_rules
} else {
panic!(
"Could not find rules.json at {} or {}",
manifest_path.join("rules.json").display(),
ws_rules.display()
);
}
} else {
panic!(
"Could not find rules.json at {} (no workspace root found)",
manifest_path.join("rules.json").display()
);
};
let file_size = fs::metadata(&rules_path)
.unwrap_or_else(|e| panic!("Failed to get metadata for {}: {}", rules_path.display(), e))
.len();
if file_size > MAX_RULES_FILE_SIZE {
panic!(
"rules.json at {} is too large ({} bytes, max {} bytes)",
rules_path.display(),
file_size,
MAX_RULES_FILE_SIZE
);
}
let rules_json = fs::read_to_string(&rules_path).unwrap_or_else(|e| {
panic!(
"Failed to read rules.json at {}: {}",
rules_path.display(),
e
)
});
let rules: serde_json::Value = serde_json::from_str(&rules_json).unwrap_or_else(|e| {
panic!(
"Failed to parse rules.json at {}: {}",
rules_path.display(),
e
)
});
let rules_array = rules["rules"]
.as_array()
.expect("rules.json must have a 'rules' array");
let mut generated_code = String::new();
generated_code.push_str("// Auto-generated from rules.json by build.rs\n");
generated_code.push_str("// Do not edit manually!\n\n");
generated_code.push_str("/// Rule data as (id, name) tuples.\n");
generated_code.push_str("/// \n");
generated_code.push_str(
"/// This is the complete list of validation rules from knowledge-base/rules.json.\n",
);
generated_code.push_str("pub const RULES_DATA: &[(&str, &str)] = &[\n");
let escape_str = |s: &str| {
s.replace('\\', "\\\\")
.replace('"', "\\\"")
.replace('\n', "\\n")
.replace('\r', "\\r")
.replace('\t', "\\t")
};
let is_valid_id = |id: &str| -> bool {
!id.is_empty()
&& id.len() <= 20
&& id.chars().all(|c| c.is_ascii_alphanumeric() || c == '-')
};
let is_valid_name = |name: &str| -> bool {
!name.is_empty() && name.len() <= 200 && !name.chars().any(|c| c.is_control() && c != ' ')
};
for (idx, rule) in rules_array.iter().enumerate() {
let id = rule["id"]
.as_str()
.unwrap_or_else(|| panic!("rule[{}] must have string 'id' field", idx));
let name = rule["name"]
.as_str()
.unwrap_or_else(|| panic!("rule[{}] must have string 'name' field", idx));
if !is_valid_id(id) {
panic!(
"rule[{}] has invalid id '{}': must be 1-20 alphanumeric/hyphen characters",
idx, id
);
}
if !is_valid_name(name) {
panic!(
"rule[{}] '{}' has invalid name: must be 1-200 chars, no control characters",
idx, id
);
}
let escaped_id = escape_str(id);
let escaped_name = escape_str(name);
generated_code.push_str(&format!(
" (\"{}\", \"{}\"),\n",
escaped_id, escaped_name
));
}
generated_code.push_str("];\n\n");
let mut tools: BTreeSet<String> = BTreeSet::new();
for rule in rules_array {
if let Some(tool) = rule
.get("evidence")
.and_then(|e| e.get("applies_to"))
.and_then(|a| a.get("tool"))
.and_then(|t| t.as_str())
{
if !tool.is_empty() {
tools.insert(tool.to_string());
}
}
}
generated_code
.push_str("/// Valid tool names derived from rules.json evidence.applies_to.tool.\n");
generated_code.push_str("/// \n");
generated_code
.push_str("/// These are the tools that have at least one rule specifically for them.\n");
generated_code.push_str("pub const VALID_TOOLS: &[&str] = &[\n");
for tool in &tools {
generated_code.push_str(&format!(" \"{}\",\n", escape_str(tool)));
}
generated_code.push_str("];\n\n");
#[derive(Default)]
struct PrefixInfo {
tools: BTreeSet<String>,
has_generic: bool, }
let mut prefix_info: BTreeMap<String, PrefixInfo> = BTreeMap::new();
for rule in rules_array {
let id = rule["id"].as_str().unwrap_or("");
let tool = rule
.get("evidence")
.and_then(|e| e.get("applies_to"))
.and_then(|a| a.get("tool"))
.and_then(|t| t.as_str());
if let Some(prefix) = extract_rule_prefix(id) {
let info = prefix_info.entry(prefix).or_default();
if let Some(tool_name) = tool {
if !tool_name.is_empty() {
info.tools.insert(tool_name.to_string());
} else {
info.has_generic = true;
}
} else {
info.has_generic = true;
}
}
}
generated_code.push_str("/// Mapping of rule ID prefixes to their associated tools.\n");
generated_code.push_str("/// \n");
generated_code.push_str(
"/// Derived from rules.json: for each prefix, this is the tool that all rules\n",
);
generated_code
.push_str("/// with that prefix apply to. Only includes prefixes where ALL rules\n");
generated_code
.push_str("/// consistently specify the same tool (excludes generic prefixes).\n");
generated_code.push_str("pub const TOOL_RULE_PREFIXES: &[(&str, &str)] = &[\n");
for (prefix, info) in &prefix_info {
if info.tools.len() == 1 && !info.has_generic {
let tool = info.tools.iter().next().unwrap();
generated_code.push_str(&format!(
" (\"{}\", \"{}\"),\n",
escape_str(prefix),
escape_str(tool)
));
}
}
generated_code.push_str("];\n");
let out_dir = env::var("OUT_DIR").unwrap();
let dest_path = Path::new(&out_dir).join("rules_data.rs");
fs::write(&dest_path, generated_code).expect("Failed to write generated rules");
}
fn extract_rule_prefix(rule_id: &str) -> Option<String> {
if let Some((prefix, suffix)) = rule_id.rsplit_once('-') {
if !suffix.is_empty() && suffix.chars().all(|c| c.is_ascii_digit()) {
return Some(format!("{}-", prefix));
}
}
None
}