ckg-storage 1.1.2

CozoDB-backed storage layer for ckg (per-repo + registry DBs).
Documentation
//! Pre-baked Datalog queries (blast-radius, call-chain, dead-code).
//!
//! All graph traversals use a **depth-bounded counter** rather than naive
//! fixed-point recursion — Cozo prunes the `dist <= depth` branches and gives
//! roughly 9× lower latency on a 1k-symbol graph (see
//! `crates/ckg-storage/tests/perf_spike.rs`).
//!
//! ## Injection safety
//!
//! `seed` is parameter-bound via Cozo `$seed` (caller-supplied node id).
//! `depth` is `u32` and validated `<= MAX_DEPTH` before being formatted into
//! the script body — no caller-controlled string ever reaches the script
//! text. The previous implementation `format!`-pasted `seed` after a
//! quote-only escape that did not handle `\n` / control chars; that vector
//! is closed.

use std::collections::BTreeMap;

use cozo::DataValue;

/// Hard ceiling on traversal depth. Keeps queries from compiling into
/// runaway joins and rejects negative-test inputs that would otherwise
/// overflow `dist`.
pub const MAX_DEPTH: u32 = 32;

fn clamp_depth(depth: u32) -> u32 {
    depth.clamp(1, MAX_DEPTH)
}

/// Build params for a `$seed` binding.
fn seed_params(seed: &str) -> BTreeMap<String, DataValue> {
    let mut p = BTreeMap::new();
    p.insert("seed".into(), DataValue::from(seed));
    p
}

/// Upward transitive closure over `Calls` from `seed`, bounded by `depth`.
/// Returns `(script, params)` for use with `Storage::run_with` /
/// `Storage::run_with_immutable`.
pub fn blast_radius(seed: &str, depth: u32) -> (String, BTreeMap<String, DataValue>) {
    let depth = clamp_depth(depth);
    let script = format!(
        "ancestors[id, dist] := *Calls{{src: id, dst: $seed}}, dist = 1\n\
         ancestors[id, dist] := ancestors[m, d], *Calls{{src: id, dst: m}}, \
         dist = d + 1, dist <= {depth}\n\
         ?[id, dist] := ancestors[id, dist] :order dist :limit 5000\n"
    );
    (script, seed_params(seed))
}

/// Forward call traversal from `seed`, bounded by `depth`.
/// Returns `(script, params)` for use with `Storage::run_with` /
/// `Storage::run_with_immutable`.
pub fn call_chain(seed: &str, depth: u32) -> (String, BTreeMap<String, DataValue>) {
    let depth = clamp_depth(depth);
    let script = format!(
        "reach[id, dist] := *Calls{{src: $seed, dst: id}}, dist = 1\n\
         reach[id, dist] := reach[m, d], *Calls{{src: m, dst: id}}, \
         dist = d + 1, dist <= {depth}\n\
         ?[id, dist] := reach[id, dist] :order dist :limit 5000\n"
    );
    (script, seed_params(seed))
}

/// Symbols (functions/methods/constructors) with no inbound `Calls` edges.
///
/// L1: The hard-coded `:limit 1000` is intentional for safety (avoids OOM on
/// large repos), but callers that need a different cap should use
/// `dead_code_with_limit` instead.
pub const DEAD_CODE: &str = "
?[id, qname, kind, file, line] :=
    *Symbol{id, qname, kind, file, line},
    (kind = \"function\" or kind = \"method\" or kind = \"constructor\"),
    not *Calls{dst: id}
:limit 1000
";

/// Dead-code query with a caller-supplied limit cap.
/// `limit` is validated to be `>= 1`; values above `MAX_DEPTH * 10_000`
/// are silently clamped to keep query output reasonable.
pub fn dead_code_with_limit(limit: u32) -> (String, BTreeMap<String, DataValue>) {
    let limit = limit.clamp(1, 200_000);
    let script = format!(
        "?[id, qname, kind, file, line] :=\n\
             *Symbol{{id, qname, kind, file, line}},\n\
             (kind = \"function\" or kind = \"method\" or kind = \"constructor\"),\n\
             not *Calls{{dst: id}}\n\
         :limit {limit}\n"
    );
    (script, BTreeMap::new())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn blast_uses_seed_param_not_interpolation() {
        let (q, p) = blast_radius("a\"b\nseparator", 2);
        // No string interpolation of seed — `$seed` is a param placeholder
        // and the literal value lives in `p`.
        assert!(q.contains("$seed"));
        assert!(!q.contains("a\"b"));
        assert!(q.contains("dist <= 2"));
        assert_eq!(p.get("seed"), Some(&DataValue::from("a\"b\nseparator")));
    }

    #[test]
    fn call_chain_includes_depth_and_seed_param() {
        let (q, p) = call_chain("seed", 5);
        assert!(q.contains("dist <= 5"));
        assert!(q.contains("$seed"));
        assert_eq!(p.get("seed"), Some(&DataValue::from("seed")));
    }

    #[test]
    fn depth_is_clamped() {
        // Way above MAX_DEPTH → clamped to MAX_DEPTH.
        let (q, _) = blast_radius("x", 9999);
        assert!(q.contains(&format!("dist <= {MAX_DEPTH}")));
        // Zero is invalid; clamp to 1.
        let (q, _) = call_chain("x", 0);
        assert!(q.contains("dist <= 1"));
    }
}