sqlite-graphrag 1.0.78

Persistent GraphRAG memory for Claude Code, Codex, Cursor, and 24+ AI agents in a single 6 MB Rust binary. LLM-only and one-shot in v1.0.78: every `remember` / `ingest` spawns a headless claude code or codex subprocess (OAuth, no MCP, no hooks). No daemon. No ONNX runtime. No model download. Graph-native retrieval with FTS5 + cosine + multi-hop traversal. OAuth-only enforcement: API keys ABORT the spawn.
Documentation
#![cfg(feature = "slow-tests")]

//! End-to-end coverage for the `--low-memory` flag and the
//! `SQLITE_GRAPHRAG_LOW_MEMORY` env var on the `ingest` subcommand.
//!
//! Each test owns an isolated `TempDir` and points the binary at it through
//! `SQLITE_GRAPHRAG_DB_PATH` and `SQLITE_GRAPHRAG_CACHE_DIR`. The
//! `--skip-memory-guard` flag prevents the daemon autostart path during
//! parallel test runs. `#[serial]` is mandatory because the binary artefact
//! is shared and the env var these tests manipulate is process-global.

use assert_cmd::prelude::*;
use serde_json::Value;
use serial_test::serial;
use std::process::Command;
use tempfile::TempDir;

/// Builds a fresh `Command` with the mock LLM PATH prepended.
///
/// v1.0.76 spawns `claude` or `codex` on every `remember` / `ingest` /
/// `edit`. The bundled mocks under `tests/mock-llm/` return a fixed
/// 384-dim zero vector so the binary finishes without a real OAuth
/// login. The mock directory is leaked (no TempDir cleanup) so the
/// spawned subprocess always finds the mocks.
fn sgr_cmd() -> Command {
    let mock_dir = common::mock_llm_path();
    let mut c = Command::cargo_bin("sqlite-graphrag").expect("sqlite-graphrag binary not found");
    c.env("PATH", common::prepend_path(&mock_dir));
    c
}

#[path = "common/mod.rs"]
mod common;

fn ingest_cmd(temp: &TempDir) -> Command {
    let mut cmd = sgr_cmd();
    cmd.env(
        "SQLITE_GRAPHRAG_DB_PATH",
        temp.path().join("graphrag.sqlite"),
    );
    cmd.env("SQLITE_GRAPHRAG_CACHE_DIR", temp.path().join("cache"));
    cmd.env("SQLITE_GRAPHRAG_NAMESPACE", "global");
    // Keep tests deterministic regardless of the host shell's env.
    cmd.env_remove("SQLITE_GRAPHRAG_LOW_MEMORY");
    cmd.arg("--skip-memory-guard");
    cmd
}

fn init_db(temp: &TempDir) {
    let mut c = sgr_cmd();
    c.env(
        "SQLITE_GRAPHRAG_DB_PATH",
        temp.path().join("graphrag.sqlite"),
    );
    c.env("SQLITE_GRAPHRAG_CACHE_DIR", temp.path().join("cache"));
    c.env_remove("SQLITE_GRAPHRAG_LOW_MEMORY");
    c.args(["--skip-memory-guard", "init"]).assert().success();
}

fn write_corpus(temp: &TempDir) -> std::path::PathBuf {
    let dir = temp.path().join("corpus");
    std::fs::create_dir_all(&dir).expect("create corpus dir");
    std::fs::write(dir.join("a.md"), "# Alpha\n\nContent of file alpha.").expect("write a.md");
    std::fs::write(dir.join("b.md"), "# Beta\n\nContent of file beta.").expect("write b.md");
    dir
}

/// Locates the final NDJSON summary line emitted by `ingest`.
fn parse_summary(stdout: &[u8]) -> Value {
    let lines: Vec<Value> = String::from_utf8_lossy(stdout)
        .lines()
        .filter(|l| !l.trim().is_empty())
        .map(|l| serde_json::from_str::<Value>(l).expect("ndjson line is valid JSON"))
        .collect();
    lines
        .into_iter()
        .rev()
        .find(|v| v.get("summary").and_then(|x| x.as_bool()).unwrap_or(false))
        .expect("summary line missing")
}

#[test]
#[serial]
fn ingest_low_memory_flag_succeeds() {
    let tmp = TempDir::new().unwrap();
    init_db(&tmp);
    let dir = write_corpus(&tmp);

    let out = ingest_cmd(&tmp)
        .args([
            "ingest",
            dir.to_str().unwrap(),
            "--type",
            "document",
            "--pattern",
            "*.md",
            "--low-memory",
        ])
        .assert()
        .success();

    let summary = parse_summary(&out.get_output().stdout);
    assert_eq!(
        summary["files_succeeded"].as_u64().unwrap(),
        2,
        "summary: {summary}"
    );
    assert_eq!(summary["files_failed"].as_u64().unwrap(), 0);
}

#[test]
#[serial]
fn ingest_low_memory_rejects_explicit_parallelism() {
    let tmp = TempDir::new().unwrap();
    init_db(&tmp);
    let dir = write_corpus(&tmp);

    let out = ingest_cmd(&tmp)
        .args([
            "ingest",
            dir.to_str().unwrap(),
            "--type",
            "document",
            "--pattern",
            "*.md",
            "--low-memory",
            "--ingest-parallelism",
            "4",
        ])
        .assert()
        .failure();

    let stderr = String::from_utf8_lossy(&out.get_output().stderr);
    assert!(
        stderr.contains("conflicts with --low-memory"),
        "stderr must announce the conflict; got:\n{stderr}"
    );
}

#[test]
#[serial]
fn ingest_env_var_low_memory_activates_mode() {
    let tmp = TempDir::new().unwrap();
    init_db(&tmp);
    let dir = write_corpus(&tmp);

    let out = ingest_cmd(&tmp)
        .env("SQLITE_GRAPHRAG_LOW_MEMORY", "1")
        .args([
            "-v",
            "ingest",
            dir.to_str().unwrap(),
            "--type",
            "document",
            "--pattern",
            "*.md",
        ])
        .assert()
        .success();

    let stderr = String::from_utf8_lossy(&out.get_output().stderr);
    assert!(
        stderr.contains("low-memory mode enabled via SQLITE_GRAPHRAG_LOW_MEMORY"),
        "stderr must announce env-driven low-memory mode; got:\n{stderr}"
    );
    let summary = parse_summary(&out.get_output().stdout);
    assert_eq!(summary["files_succeeded"].as_u64().unwrap(), 2);
}