ckg-storage 1.1.2

CozoDB-backed storage layer for ckg (per-repo + registry DBs).
Documentation
//! Bulk insert operations: `put_symbols` and `put_edges`.

use std::collections::BTreeMap;

use ckg_core::{Edge, Result, Symbol};
use cozo::{DataValue, ScriptMutability};

use super::map_err;
use super::Storage;

/// Serialize a `Symbol` into a Cozo row (list of DataValues).
pub(super) fn symbol_to_row(s: &Symbol) -> DataValue {
    DataValue::List(vec![
        DataValue::from(s.id.as_str()),
        DataValue::from(s.qname.as_str()),
        DataValue::from(s.name.as_str()),
        DataValue::from(s.kind.as_str()),
        DataValue::from(s.file.as_str()),
        DataValue::from(s.line as i64),
        DataValue::from(s.col as i64),
        DataValue::Bool(s.is_public),
        DataValue::from(s.doc.as_str()),
        DataValue::from(s.hash.as_str()),
    ])
}

/// Serialize an `Edge` into a Cozo row, optionally including the confidence
/// column (relations with confidence: `Calls`, `Imports`, `Extends`,
/// `Implements`, `Awaits`).
pub(super) fn edge_to_row(e: &Edge, with_conf: bool) -> DataValue {
    if with_conf {
        DataValue::List(vec![
            DataValue::from(e.src.as_str()),
            DataValue::from(e.dst.as_str()),
            DataValue::from(e.confidence as f64),
        ])
    } else {
        DataValue::List(vec![
            DataValue::from(e.src.as_str()),
            DataValue::from(e.dst.as_str()),
        ])
    }
}

impl Storage {
    /// Bulk insert symbols using `:put` with a `$rows` parameter so user data
    /// is bound, not pasted into the script (avoids quote-escape pitfalls).
    pub fn put_symbols(&self, symbols: &[Symbol]) -> Result<()> {
        const SCRIPT: &str = "
?[id, qname, name, kind, file, line, col, is_public, doc, hash] <- $rows
:put Symbol {id => qname, name, kind, file, line, col, is_public, doc, hash}
";
        for chunk in symbols.chunks(1000) {
            let rows: Vec<DataValue> = chunk.iter().map(symbol_to_row).collect();
            let mut params = BTreeMap::new();
            params.insert("rows".into(), DataValue::List(rows));
            self.db
                .run_script(SCRIPT, params, ScriptMutability::Mutable)
                .map_err(map_err)?;
        }
        Ok(())
    }

    /// Bulk insert edges. Routes by `EdgeKind` to the right relation.
    ///
    /// `by_rel` groups edges by their Cozo relation name (`&'static str` from
    /// `EdgeKind::as_relation()`). An enum-keyed array would be marginally
    /// faster but `EdgeKind` variants are few and the BTreeMap overhead is
    /// negligible — see L5 tracking comment. Deferring until `EdgeKind` gains
    /// a stable `#[repr(u8)]` discriminant.
    pub fn put_edges(&self, edges: &[Edge]) -> Result<()> {
        let mut by_rel: BTreeMap<&'static str, Vec<&Edge>> = BTreeMap::new();
        for e in edges {
            by_rel.entry(e.kind.as_relation()).or_default().push(e);
        }
        for (rel, list) in by_rel {
            let with_conf = matches!(
                rel,
                "Calls" | "Imports" | "Extends" | "Implements" | "Awaits"
            );
            let script = if with_conf {
                format!("?[src, dst, confidence] <- $rows\n:put {rel} {{src, dst => confidence}}\n")
            } else {
                format!("?[src, dst] <- $rows\n:put {rel} {{src, dst}}\n")
            };
            for chunk in list.chunks(1000) {
                let rows: Vec<DataValue> =
                    chunk.iter().map(|e| edge_to_row(e, with_conf)).collect();
                let mut params = BTreeMap::new();
                params.insert("rows".into(), DataValue::List(rows));
                self.db
                    .run_script(&script, params, ScriptMutability::Mutable)
                    .map_err(map_err)?;
            }
        }
        Ok(())
    }
}