use crate::invariant::rules::util::{rel, walk_repo};
use crate::invariant::{Category, Context, Invariant, Outcome};
use regex::Regex;
use std::fs;
use std::sync::OnceLock;
const HOST_ALLOWLIST: &[&str] = &[
"localhost",
"127.0.0.1",
"0.0.0.0",
"::1",
"example.com",
"example.org",
"example.net",
];
const ALLOW_DIRECTIVE: &str = "koala:allow=security.no-hardcoded-url";
fn url_pattern() -> &'static Regex {
static R: OnceLock<Regex> = OnceLock::new();
R.get_or_init(|| {
Regex::new(r#"(?P<scheme>https?|s3|wss?|grpc|nats|kafka)://(?P<host>[^/:\s"'?#]+)"#)
.expect("static URL pattern compiles")
})
}
fn is_test_path(rel_path: &str) -> bool {
rel_path.split('/').any(|c| c == "tests" || c == "benches")
}
fn host_allowed(host: &str) -> bool {
if HOST_ALLOWLIST.contains(&host) {
return true;
}
[".example.com", ".example.org", ".example.net"]
.iter()
.any(|suf| host.ends_with(suf))
}
fn lines_in_cfg_test(content: &str) -> Vec<bool> {
let lines: Vec<&str> = content.lines().collect();
let mut in_test = vec![false; lines.len()];
let mut i = 0;
while i < lines.len() {
if lines[i].contains("#[cfg(test)]") {
let mut depth: i32 = 0;
let mut started = false;
let mut j = i;
while j < lines.len() {
let delta: i32 = lines[j]
.chars()
.map(|c| match c {
'{' => 1,
'}' => -1,
_ => 0,
})
.sum();
if !started {
if delta > 0 {
started = true;
depth = delta;
in_test[j] = true;
}
} else {
in_test[j] = true;
depth += delta;
if depth <= 0 {
break;
}
}
j += 1;
}
i = j + 1;
} else {
i += 1;
}
}
in_test
}
pub struct NoHardcodedUrl;
impl Invariant for NoHardcodedUrl {
fn id(&self) -> &'static str {
"security.no-hardcoded-url"
}
fn category(&self) -> Category {
Category::Security
}
fn intent(&self) -> &'static str {
"Source code must not embed external URL literals; route via config or env. See ADR-0016."
}
fn adr(&self) -> Option<&'static str> {
Some("ADR-0016")
}
fn evaluate(&self, ctx: &Context) -> Outcome {
let re = url_pattern();
let self_path = "crates/koala-core/src/invariant/rules/no_hardcoded_url.rs";
let mut hits = Vec::new();
for entry in walk_repo(ctx.root()).filter(|e| e.file_type().is_file()) {
let path = entry.path();
if path.extension().and_then(|s| s.to_str()) != Some("rs") {
continue;
}
let r = rel(path, ctx.root());
if r == self_path || is_test_path(&r) {
continue;
}
let Ok(content) = fs::read_to_string(path) else {
continue;
};
let test_lines = lines_in_cfg_test(&content);
for (i, line) in content.lines().enumerate() {
if test_lines.get(i).copied().unwrap_or(false) {
continue;
}
let Some(caps) = re.captures(line) else {
continue;
};
let m = caps.get(0).unwrap();
if line[..m.start()].contains("//") {
continue;
}
let host = caps.name("host").unwrap().as_str();
if host_allowed(host) {
continue;
}
if let Some(after) = line.split(ALLOW_DIRECTIVE).nth(1) {
let trimmed = after.trim_start_matches([' ', '\t']);
let reason = trimmed.strip_prefix("reason=").map(str::trim);
if reason.is_some_and(|s| !s.is_empty()) {
continue;
}
hits.push(format!(
"{}:{} (allow directive missing non-empty `reason=`)",
r,
i + 1
));
continue;
}
hits.push(format!(
"{}:{} {}://{}",
r,
i + 1,
caps.name("scheme").unwrap().as_str(),
host
));
}
}
if hits.is_empty() {
Outcome::pass()
} else {
Outcome::fail_repro(
format!(
"Hardcoded URL literals (move to config or env, or annotate with `// {ALLOW_DIRECTIVE} reason=...`):\n {}",
hits.join("\n ")
),
"rg -n '(https?|s3|wss?|grpc|nats|kafka)://' crates",
)
}
}
}