koala-core 1.0.4

Shared types, invariant evaluator, and primitives for the koala framework.
Documentation
use crate::invariant::rules::util::{rel, walk_repo};
use crate::invariant::{Category, Context, Invariant, Outcome};
use regex::Regex;
use std::fs;
use std::sync::OnceLock;

const HOST_ALLOWLIST: &[&str] = &[
    "localhost",
    "127.0.0.1",
    "0.0.0.0",
    "::1",
    "example.com",
    "example.org",
    "example.net",
];

const ALLOW_DIRECTIVE: &str = "koala:allow=security.no-hardcoded-url";

fn url_pattern() -> &'static Regex {
    static R: OnceLock<Regex> = OnceLock::new();
    R.get_or_init(|| {
        Regex::new(r#"(?P<scheme>https?|s3|wss?|grpc|nats|kafka)://(?P<host>[^/:\s"'?#]+)"#)
            .expect("static URL pattern compiles")
    })
}

fn is_test_path(rel_path: &str) -> bool {
    rel_path.split('/').any(|c| c == "tests" || c == "benches")
}

fn host_allowed(host: &str) -> bool {
    if HOST_ALLOWLIST.contains(&host) {
        return true;
    }
    [".example.com", ".example.org", ".example.net"]
        .iter()
        .any(|suf| host.ends_with(suf))
}

/// Naive brace tracker: marks each line as "inside a #[cfg(test)] block".
/// Triggered by `#[cfg(test)]`; tracks `{` / `}` deltas from that line until
/// depth returns to 0. Limitation: braces inside string literals can mis-count;
/// acceptable for v1 (rare in test code, never silently passes prod hits).
fn lines_in_cfg_test(content: &str) -> Vec<bool> {
    let lines: Vec<&str> = content.lines().collect();
    let mut in_test = vec![false; lines.len()];
    let mut i = 0;
    while i < lines.len() {
        if lines[i].contains("#[cfg(test)]") {
            let mut depth: i32 = 0;
            let mut started = false;
            let mut j = i;
            while j < lines.len() {
                let delta: i32 = lines[j]
                    .chars()
                    .map(|c| match c {
                        '{' => 1,
                        '}' => -1,
                        _ => 0,
                    })
                    .sum();
                if !started {
                    if delta > 0 {
                        started = true;
                        depth = delta;
                        in_test[j] = true;
                    }
                } else {
                    in_test[j] = true;
                    depth += delta;
                    if depth <= 0 {
                        break;
                    }
                }
                j += 1;
            }
            i = j + 1;
        } else {
            i += 1;
        }
    }
    in_test
}

pub struct NoHardcodedUrl;

impl Invariant for NoHardcodedUrl {
    fn id(&self) -> &'static str {
        "security.no-hardcoded-url"
    }
    fn category(&self) -> Category {
        Category::Security
    }
    fn intent(&self) -> &'static str {
        "Source code must not embed external URL literals; route via config or env. See ADR-0016."
    }
    fn adr(&self) -> Option<&'static str> {
        Some("ADR-0016")
    }

    fn evaluate(&self, ctx: &Context) -> Outcome {
        let re = url_pattern();
        let self_path = "crates/koala-core/src/invariant/rules/no_hardcoded_url.rs";
        let mut hits = Vec::new();

        for entry in walk_repo(ctx.root()).filter(|e| e.file_type().is_file()) {
            let path = entry.path();
            if path.extension().and_then(|s| s.to_str()) != Some("rs") {
                continue;
            }
            let r = rel(path, ctx.root());
            if r == self_path || is_test_path(&r) {
                continue;
            }
            let Ok(content) = fs::read_to_string(path) else {
                continue;
            };
            let test_lines = lines_in_cfg_test(&content);

            for (i, line) in content.lines().enumerate() {
                if test_lines.get(i).copied().unwrap_or(false) {
                    continue;
                }
                let Some(caps) = re.captures(line) else {
                    continue;
                };
                let m = caps.get(0).unwrap();
                if line[..m.start()].contains("//") {
                    continue;
                }
                let host = caps.name("host").unwrap().as_str();
                if host_allowed(host) {
                    continue;
                }
                if let Some(after) = line.split(ALLOW_DIRECTIVE).nth(1) {
                    let trimmed = after.trim_start_matches([' ', '\t']);
                    let reason = trimmed.strip_prefix("reason=").map(str::trim);
                    if reason.is_some_and(|s| !s.is_empty()) {
                        continue;
                    }
                    hits.push(format!(
                        "{}:{} (allow directive missing non-empty `reason=`)",
                        r,
                        i + 1
                    ));
                    continue;
                }
                hits.push(format!(
                    "{}:{} {}://{}",
                    r,
                    i + 1,
                    caps.name("scheme").unwrap().as_str(),
                    host
                ));
            }
        }
        if hits.is_empty() {
            Outcome::pass()
        } else {
            Outcome::fail_repro(
                format!(
                    "Hardcoded URL literals (move to config or env, or annotate with `// {ALLOW_DIRECTIVE} reason=...`):\n  {}",
                    hits.join("\n  ")
                ),
                "rg -n '(https?|s3|wss?|grpc|nats|kafka)://' crates",
            )
        }
    }
}