ckg-storage 1.0.8

CozoDB-backed storage layer for ckg (per-repo + registry DBs).
Documentation
//! `RegistryStorage` — shared cross-repo DB handle.
//!
//! Lives next to `Storage` rather than in its own crate because it
//! shares the Cozo dependency, the engine constant, and the
//! `run_idempotent` helper. Extracted from `mod.rs` (CR-ast-graph
//! follow-up) so the parent file can stay close to 200 LOC.

use std::collections::BTreeMap;
use std::path::{Path, PathBuf};

use ckg_core::Result;
use cozo::{DataValue, DbInstance, ScriptMutability};

use crate::schema::REGISTRY_DDL;

use super::lifecycle::{ENGINE, run_idempotent};
use super::map_err;

/// Registry DB handle (cross-repo).
pub struct RegistryStorage {
    db_path: PathBuf,
    db: DbInstance,
}

impl RegistryStorage {
    pub fn open(base: &Path) -> Result<Self> {
        let db_path = base.join("registry");
        std::fs::create_dir_all(&db_path)?;
        let db = DbInstance::new(ENGINE, &db_path, "{}").map_err(map_err)?;
        for stmt in REGISTRY_DDL {
            run_idempotent(&db, stmt)?;
        }
        Ok(Self { db_path, db })
    }

    pub fn db_path(&self) -> &Path {
        &self.db_path
    }

    pub fn db(&self) -> &DbInstance {
        &self.db
    }

    /// Upsert one repo row.
    pub fn put_repo(
        &self,
        repo_id: &str,
        db_path: &str,
        root_path: &str,
        head_sha: &str,
        last_scanned: &str,
        languages: &[&str],
    ) -> Result<()> {
        let langs = DataValue::List(languages.iter().map(|l| DataValue::from(*l)).collect());
        let row = DataValue::List(vec![
            DataValue::from(repo_id),
            DataValue::from(db_path),
            DataValue::from(root_path),
            DataValue::from(head_sha),
            DataValue::from(last_scanned),
            langs,
        ]);
        let mut params = BTreeMap::new();
        params.insert("rows".into(), DataValue::List(vec![row]));
        self.db
            .run_script(
                "?[repo_id, db_path, root_path, head_sha, last_scanned, languages] <- $rows\n\
                 :put Repo {repo_id => db_path, root_path, head_sha, last_scanned, languages}",
                params,
                ScriptMutability::Mutable,
            )
            .map_err(map_err)?;
        Ok(())
    }

    /// List all registered repos as `(repo_id, root_path, head_sha)` rows.
    ///
    /// H6: The query caps at 100 000 rows. If the result hits the cap exactly,
    /// truncation is observable: a `tracing::warn!` is emitted so operators
    /// can detect the condition via log monitoring. In practice a real registry
    /// will never approach this limit; the cap exists to prevent OOM on a
    /// corrupted DB, not as a real operational boundary.
    pub fn list_repos(&self) -> Result<Vec<(String, String, String)>> {
        const LIST_REPOS_LIMIT: usize = 100_000;
        let rows = self
            .db
            .run_script(
                "?[id, root, head] := *Repo{repo_id: id, root_path: root, head_sha: head} \
                 :limit 100000",
                BTreeMap::new(),
                ScriptMutability::Immutable,
            )
            .map_err(map_err)?;
        if rows.rows.len() == LIST_REPOS_LIMIT {
            tracing::warn!(
                "list_repos: result hit the {LIST_REPOS_LIMIT}-row cap — \
                 some repos may be silently omitted. If the registry is legitimately \
                 this large, raise LIST_REPOS_LIMIT in registry.rs."
            );
        }
        let mut out = Vec::with_capacity(rows.rows.len());
        for r in rows.rows {
            let mut it = r.into_iter();
            let id = match it.next() {
                Some(DataValue::Str(s)) => s.to_string(),
                _ => continue,
            };
            let root = match it.next() {
                Some(DataValue::Str(s)) => s.to_string(),
                _ => continue,
            };
            let head = match it.next() {
                Some(DataValue::Str(s)) => s.to_string(),
                _ => String::new(),
            };
            out.push((id, root, head));
        }
        Ok(out)
    }
}