mnem-cli 0.1.3

Command-line interface for mnem - git for knowledge graphs.
use super::*;

/// Default `.mnem/config.toml` body written by `mnem init` so a fresh
/// `mnem retrieve "x"` succeeds out-of-the-box. Idempotent: if
/// `config.toml` already exists, init does NOT overwrite it.
///
/// previously `mnem init` created the repo
/// skeleton but never wrote `[embed]`, so a first-time retrieve
/// errored cryptically. Seeding sensible defaults is the smallest
/// fix that turns the box-fresh path into a working demo.
///
/// under the `bundled-embedder` cargo feature
/// (Path A - `cargo install mnem-cli --features bundled-embedder`)
/// the seeded default flips to onnx + all-MiniLM-L6-v2. The model
/// downloads on first use and caches under
/// `~/.cache/huggingface/hub`; no Ollama install, no OpenAI key,
/// no manual `mnem config set embed.provider ...` step. Default
/// builds (no bundled-embedder) keep the legacy Ollama seed so
/// users with a local `ollama serve` keep working unchanged.
#[cfg(feature = "bundled-embedder")]
const DEFAULT_CONFIG_TOML: &str = "\
# Auto-generated by `mnem init`. Override with `mnem config set <key> <val>`
# or edit this file. See https://github.com/Uranid/mnem/blob/main/CONFIGURABLES.md
[embed]
provider = \"onnx\"
model = \"all-MiniLM-L6-v2\"
";

#[cfg(not(feature = "bundled-embedder"))]
const DEFAULT_CONFIG_TOML: &str = "\
# Auto-generated by `mnem init`. Override with `mnem config set <key> <val>`
# or edit this file. See https://github.com/Uranid/mnem/blob/main/CONFIGURABLES.md
[embed]
provider = \"ollama\"
model = \"nomic-embed-text\"
";

/// Human-readable provider tag shown in `mnem init`'s stdout hint so
/// the operator sees the exact provider their fresh repo will use.
#[cfg(feature = "bundled-embedder")]
const DEFAULT_PROVIDER_DESC: &str = "onnx:all-MiniLM-L6-v2";

#[cfg(not(feature = "bundled-embedder"))]
const DEFAULT_PROVIDER_DESC: &str = "ollama:nomic-embed-text";

#[derive(clap::Args, Debug)]
#[command(after_long_help = "\
Examples:
 mnem init # create .mnem/ in the current directory
 mnem init ~/notes # create ~/notes/.mnem/
 mnem -R ~/notes status # verify by reading op_id + head commit
")]
pub(crate) struct Args {
    /// Directory to initialise. Defaults to the current directory.
    pub path: Option<std::path::PathBuf>,
}

pub(crate) fn run(_override: Option<&Path>, args: Args) -> Result<()> {
    let target = args
        .path
        .unwrap_or_else(|| std::env::current_dir().unwrap_or_default());
    let data_dir = target.join(repo::MNEM_DIR);

    if data_dir.exists() {
        bail!(
            "already a mnem repository at {}\n\
 hint: remove the existing `.mnem/` if you meant a fresh repo, or run \
 `mnem status` to inspect it. See docs/RUNBOOK.md#6-mid-commit-crash-recovery \
 for recovery semantics.",
            data_dir.display()
        );
    }
    let (bs, ohs) = repo::create_or_open_stores(&data_dir)?;
    let r = ReadonlyRepo::init(bs, ohs)?;

    // seed `.mnem/config.toml` with sensible
    // defaults so a fresh `mnem retrieve` works out-of-the-box (with
    // a local Ollama). Idempotent: if the file already exists (e.g.
    // a parent process pre-seeded it), do NOT overwrite. The data
    // dir was created above, so we know it exists.
    let cfg_path = config::path_of(&data_dir);
    if !cfg_path.exists() {
        // Best-effort: a write failure here is not fatal (the repo
        // skeleton is already on disk), so we print a stderr hint
        // and proceed rather than rolling back the init.
        match std::fs::write(&cfg_path, DEFAULT_CONFIG_TOML) {
            Ok(()) => {
                println!(
                    " seeded config: {} ([embed] {})",
                    cfg_path.display(),
                    DEFAULT_PROVIDER_DESC
                );
            }
            Err(e) => {
                eprintln!(
                    "(warn: failed to seed {}: {e}; run `mnem config set embed.provider {}` manually)",
                    cfg_path.display(),
                    DEFAULT_PROVIDER_DESC.split(':').next().unwrap_or("ollama")
                );
            }
        }
    }

    seed_anchor_node(&r, &data_dir);

    println!("initialised mnem repo in {}", data_dir.display());
    println!(" root op: {}", r.op_id());

    // Best-effort: register this repo in ~/.mnemglobal/repos.toml so
    // `mnem` without -R can discover it. Silent if ~/.mnemglobal doesn't
    // exist yet (user hasn't run `mnem integrate`).
    crate::global::register_repo(&target);

    Ok(())
}

/// Create and initialise `.mnem/` inside `parent` without printing to stdout.
/// Idempotent: returns Ok(()) if the repo already exists.
/// Used by `mnem init` (via `run`) and `global::bootstrap` (for ~/.mnemglobal).
pub(crate) fn init_mnem_dir(parent: &Path) -> Result<()> {
    let data_dir = parent.join(repo::MNEM_DIR);
    if data_dir.exists() {
        return Ok(());
    }
    let (bs, ohs) = repo::create_or_open_stores(&data_dir)?;
    let r = ReadonlyRepo::init(bs, ohs)?;
    let cfg_path = config::path_of(&data_dir);
    if !cfg_path.exists() {
        let _ = std::fs::write(&cfg_path, DEFAULT_CONFIG_TOML);
    }
    seed_anchor_node(&r, &data_dir);
    Ok(())
}

/// Fixed UUID for the Meta anchor node so `content_cid` is deterministic
/// across any two fresh repos that ingest the same user data.
/// audit-2026-04-25 P0-1 fix: using new_v7() here made every init produce a
/// different node ID, which propagated into the node-tree root CID and
/// ultimately into content_cid, breaking the determinism invariant.
const ANCHOR_NODE_ID: &str = "00000000-0000-7000-8000-6d6e656d0001";

/// Commit a minimal anchor node to a freshly-initialised repo.
/// Non-fatal: store or embed failures are silently swallowed so they
/// never block `mnem init` or `mnem integrate`. The node gives the
/// embedder a warm-up write and makes the graph non-empty from the
/// first second, so `mnem global retrieve` has something to return
/// without a manual `mnem reindex` run.
fn seed_anchor_node(repo: &ReadonlyRepo, data_dir: &std::path::Path) {
    let anchor_id = NodeId::parse_uuid(ANCHOR_NODE_ID)
        .expect("ANCHOR_NODE_ID is a valid UUID; this is a compile-time constant");
    let node = Node::new(anchor_id, "Meta")
        .with_summary("mnem is a persistent knowledge graph.")
        .with_prop("name".to_string(), Ipld::String("mnem".to_string()));

    let mut tx = repo.start_transaction();
    let node_cid = match tx.add_node(&node) {
        Ok(c) => c,
        Err(_) => return,
    };

    if let Ok(cfg) = config::load(data_dir) {
        if let Some(pc) = config::resolve_embedder(&cfg) {
            if let Ok(embedder) = mnem_embed_providers::open(&pc) {
                if let Ok(vec) = embedder.embed("mnem is a persistent knowledge graph.") {
                    let model = embedder.model().to_string();
                    let emb = mnem_embed_providers::to_embedding(&model, &vec);
                    let _ = tx.set_embedding(node_cid, model, emb);
                }
            }
        }
    }

    let _ = tx.commit("mnem init", "seed anchor node");
}