ckg-storage 1.1.2

CozoDB-backed storage layer for ckg (per-repo + registry DBs).
Documentation
//! Cozo Datalog DDL. Run on first open; subsequent opens swallow `relation exists`.

/// Current per-repo schema version. Bump on any breaking schema change so
/// `Storage::open` can warn users to rebuild.
///
/// **v3** added the `Symbol:by_name` secondary index intended to
/// accelerate cross-file resolution (`resolve_cross_file_calls`).
/// **Empirical finding** (cozo-ce 0.7.13, verified by
/// `tests/perf_spike.rs::resolve_query_plan_uses_join_optimization`):
/// the planner does NOT use this index for the
/// `*Symbol{name, ...}, needle_set[name]` join shape — it picks
/// `mem_prefix_join` instead, joining the bound needle_set against
/// the full Symbol scan output. Wall-clock perf is still good
/// (<500ms for 100k symbols × 1k unresolved Calls in
/// `resolve_cross_file_calls_uses_name_index`) so the user-visible
/// behavior is fine, but the index is currently architectural
/// overhead. Kept in DDL for forward-compat: future Cozo versions
/// may pick it up. Removing would require another schema bump.
// CORE-C1: bumped from 3 → 4 for node_id backtick-escape migration.
pub const SCHEMA_VERSION: u32 = 4;

/// String form of `SCHEMA_VERSION` (L7). Pre-computed to avoid repeated
/// `.to_string()` calls in `stamp_schema_version` on every `Storage::open`.
pub const SCHEMA_VERSION_STR: &str = "4";

/// Per-repo relations + indexes. `::index create` is system-rule syntax,
/// idempotent because `run_idempotent` swallows `relation already
/// exists`-style errors when re-running on an existing DB. Order
/// matters: indexes must come AFTER the relation they reference.
pub const PER_REPO_DDL: &[&str] = &[
    ":create Symbol {id: String => qname: String, name: String, kind: String, \
     file: String, line: Int, col: Int, is_public: Bool, doc: String, hash: String}",
    ":create File {path: String => language: String, hash: String}",
    ":create Doc {id: String => body: String}",
    ":create Route {id: String => method: String, path: String, handler: String}",
    ":create Calls {src: String, dst: String => confidence: Float}",
    ":create Imports {src: String, dst: String => confidence: Float}",
    ":create Extends {src: String, dst: String => confidence: Float}",
    ":create Implements {src: String, dst: String => confidence: Float}",
    ":create Defines {src: String, dst: String}",
    ":create Documents {src: String, dst: String}",
    ":create Tests {src: String, dst: String}",
    // `await` call sites — confidence mirrors `Calls`.
    ":create Awaits {src: String, dst: String => confidence: Float}",
    // Per-symbol 768-dim embedding (nomic-embed-text-v1.5). Typed F32 vector
    // so the HNSW index (declared via `::hnsw create` after the first row is
    // inserted) can serve sub-millisecond nearest-neighbor queries.
    ":create Embedding {id: String => vec: <F32; 768>}",
    // Free-form key/value for schema version, last-resolver run, etc.
    ":create Meta {key: String => value: String}",
    // Secondary index on Symbol.name. Backs the I8 `needle_set[name]`
    // join in `resolve_cross_file_calls` — without this index, that
    // query falls back to a full Symbol scan filtered by membership,
    // which scales with total symbols not needle count.
    "::index create Symbol:by_name {name}",
];

/// Registry DB relations (cross-repo).
pub const REGISTRY_DDL: &[&str] = &[
    ":create Repo {repo_id: String => db_path: String, root_path: String, \
     head_sha: String, last_scanned: String, languages: [String]}",
    ":create GlobalSymbol {gid: String => qname: String, repo_id: String, \
     local_node_id: String, kind: String, embedding: <F32; 768>}",
    ":create CROSS_CALLS {src: String, dst: String => confidence: Float}",
    ":create CROSS_IMPORTS {src: String, dst: String => confidence: Float}",
    ":create CROSS_EXTENDS {src: String, dst: String => confidence: Float}",
];