Skip to main content

sqlite_graphrag/
cli.rs

1//! CLI argument structs and command surface (clap-based).
2//!
3//! Defines `Cli` and all subcommand enums; contains no business logic.
4
5use crate::commands::*;
6use crate::i18n::{current, Language};
7use clap::{Parser, Subcommand};
8
9/// Returns the maximum simultaneous invocations allowed by the CPU heuristic.
10fn max_concurrency_ceiling() -> usize {
11    std::thread::available_parallelism()
12        .map(|n| n.get() * 2)
13        .unwrap_or(8)
14}
15
16#[derive(Copy, Clone, Debug, PartialEq, Eq, clap::ValueEnum)]
17pub enum GraphExportFormat {
18    Json,
19    Dot,
20    Mermaid,
21    /// Stream one JSON object per entity, then one per edge, then a summary line.
22    Ndjson,
23}
24
25/// v1.0.82 (GAP-003): LLM backend for embedding. Accepts `auto` (default —
26/// detects `codex` or `claude` on the PATH), `codex` (forces codex exec), `claude`
27/// (forces claude -p), or `none` (skips embedding; useful for tests).
28#[derive(Copy, Clone, Debug, PartialEq, Eq, clap::ValueEnum)]
29pub enum LlmBackendChoice {
30    Auto,
31    Claude,
32    Codex,
33    Opencode,
34    OpenRouter,
35    None,
36}
37
38/// v1.0.93: embedding backend selector. Separate from `--llm-backend` which
39/// controls enrichment (entity extraction, body enrichment) via subprocess.
40/// `auto` tries OpenRouter if API key is available, falls back to LLM subprocess.
41/// `openrouter` requires API key (exit 78 if absent).
42/// `llm` forces subprocess (codex/claude/opencode) — legacy behaviour.
43#[derive(Copy, Clone, Debug, PartialEq, Eq, clap::ValueEnum)]
44pub enum EmbeddingBackendChoice {
45    Auto,
46    Openrouter,
47    Llm,
48}
49
50impl EmbeddingBackendChoice {
51    /// v1.0.93: produces a fallback chain that prepends OpenRouter when
52    /// the client is initialised. Falls back to the LLM subprocess chain.
53    pub fn to_chain(self, llm_choice: LlmBackendChoice) -> Vec<crate::embedder::LlmBackendKind> {
54        use crate::embedder::LlmBackendKind;
55        match self {
56            EmbeddingBackendChoice::Openrouter => vec![LlmBackendKind::OpenRouter],
57            EmbeddingBackendChoice::Llm => llm_choice.to_chain(),
58            EmbeddingBackendChoice::Auto => {
59                if crate::embedder::is_openrouter_initialized() {
60                    let mut chain = vec![LlmBackendKind::OpenRouter];
61                    chain.extend(llm_choice.to_chain());
62                    chain
63                } else {
64                    llm_choice.to_chain()
65                }
66            }
67        }
68    }
69}
70
71impl LlmBackendChoice {
72    /// v1.0.82 (GAP-003): converts the CLI choice into an ordered chain
73    /// of backends that `embedder::embed_with_fallback` iterates. The first
74    /// element of the chain is the preferred backend; subsequent elements
75    /// are fallbacks used when the preferred one fails with `LlmBackendError`.
76    ///
77    /// `Auto` produces `[Codex, Claude, None]` — codex is the default since v1.0.76+,
78    /// claude is the fallback if codex fails (OAuth contention, quota), and
79    /// `None` lets `embed_with_fallback` return an empty vector when
80    /// `skip_on_failure` is active.
81    pub fn to_chain(self) -> Vec<crate::embedder::LlmBackendKind> {
82        use crate::embedder::LlmBackendKind;
83        match self {
84            LlmBackendChoice::Codex => vec![LlmBackendKind::Codex, LlmBackendKind::None],
85            LlmBackendChoice::Claude => vec![LlmBackendKind::Claude, LlmBackendKind::None],
86            LlmBackendChoice::Opencode => vec![
87                LlmBackendKind::Opencode,
88                LlmBackendKind::Codex,
89                LlmBackendKind::Claude,
90                LlmBackendKind::None,
91            ],
92            LlmBackendChoice::OpenRouter => vec![
93                LlmBackendKind::OpenRouter,
94                LlmBackendKind::Codex,
95                LlmBackendKind::None,
96            ],
97            LlmBackendChoice::None => vec![LlmBackendKind::None],
98            LlmBackendChoice::Auto => parse_fallback_chain(
99                &std::env::var("SQLITE_GRAPHRAG_LLM_FALLBACK")
100                    .unwrap_or_else(|_| "codex,claude,none".to_string()),
101            ),
102        }
103    }
104}
105
106fn parse_fallback_chain(s: &str) -> Vec<crate::embedder::LlmBackendKind> {
107    use crate::embedder::LlmBackendKind;
108    let mut chain: Vec<LlmBackendKind> = s
109        .split(',')
110        .filter_map(|tok| match tok.trim().to_ascii_lowercase().as_str() {
111            "codex" => Some(LlmBackendKind::Codex),
112            "claude" | "claude-code" => Some(LlmBackendKind::Claude),
113            "opencode" => Some(LlmBackendKind::Opencode),
114            "openrouter" => Some(LlmBackendKind::OpenRouter),
115            "none" => Some(LlmBackendKind::None),
116            _ => {
117                tracing::warn!(
118                    token = tok.trim(),
119                    "unknown backend in --llm-fallback, skipping"
120                );
121                Option::None
122            }
123        })
124        .collect();
125    if chain.is_empty() {
126        chain = vec![
127            LlmBackendKind::Codex,
128            LlmBackendKind::Claude,
129            LlmBackendKind::None,
130        ];
131    }
132    chain
133}
134
135#[derive(Parser)]
136#[command(name = "sqlite-graphrag")]
137#[command(version)]
138#[command(about = "Local GraphRAG memory for LLMs in a single SQLite file")]
139#[command(arg_required_else_help = true)]
140#[command(after_help = "DATABASE PATH (GAP-SG-32):\n  \
141    `--db` is a PER-SUBCOMMAND flag, so it must come AFTER the subcommand:\n    \
142    sqlite-graphrag remember --db ./graphrag.sqlite --name mem --type note ...\n  \
143    Placing it before the subcommand (e.g. `sqlite-graphrag --db x.sqlite remember`) is rejected.\n  \
144    For a position-independent path, set the canonical env var instead:\n    \
145    SQLITE_GRAPHRAG_DB_PATH=./graphrag.sqlite sqlite-graphrag remember --name mem ...")]
146pub struct Cli {
147    /// Maximum number of simultaneous CLI invocations allowed (default: 4).
148    ///
149    /// Caps the counting semaphore used for CLI concurrency slots. The value must
150    /// stay within [1, 2×nCPUs]. Values above the ceiling are rejected with exit 2.
151    #[arg(long, global = true, value_name = "N")]
152    pub max_concurrency: Option<usize>,
153
154    /// Wait up to SECONDS for a free concurrency slot before giving up (exit 75).
155    ///
156    /// Useful in retrying agent pipelines: the process polls every 500 ms until a
157    /// slot opens or the timeout expires. Default: 300s (5 minutes).
158    #[arg(long, global = true, value_name = "SECONDS")]
159    pub wait_lock: Option<u64>,
160
161    /// Skip the available-memory check before loading the model.
162    ///
163    /// Exclusive use in automated tests where real allocation does not occur.
164    #[arg(long, global = true, hide = true, default_value_t = false)]
165    pub skip_memory_guard: bool,
166
167    /// v1.0.83 (ADR-0041): strict env-clear mode for compliance environments.
168    ///
169    /// When enabled, the LLM subprocess receives ONLY `PATH` — no
170    /// `ANTHROPIC_AUTH_TOKEN`, `ANTHROPIC_BASE_URL`, `OPENAI_BASE_URL`
171    /// or other custom-provider credentials are forwarded. Defaults to
172    /// the standard v1.0.83 whitelist that preserves custom-provider
173    /// credentials (ADR-0041). Honors env var
174    /// `SQLITE_GRAPHRAG_STRICT_ENV_CLEAR=1` when set.
175    #[arg(
176        long,
177        global = true,
178        hide = true,
179        default_value_t = false,
180        value_parser = clap::builder::BoolishValueParser::new(),
181        env = "SQLITE_GRAPHRAG_STRICT_ENV_CLEAR"
182    )]
183    pub strict_env_clear: bool,
184
185    /// v1.0.84 (ADR-0042 / GAP-002): resolve and print the LLM backend that
186    /// WOULD be invoked for embedding (binary path + model + flavour),
187    /// then exit 0 without executing the subprocess. Useful for CI
188    /// audit and sanity-check of `--llm-backend` before long sessions.
189    ///
190    /// Honors env var `SQLITE_GRAPHRAG_DRY_RUN_BACKEND=1` when set.
191    #[arg(
192        long,
193        global = true,
194        hide = true,
195        default_value_t = false,
196        value_parser = clap::builder::BoolishValueParser::new(),
197        env = "SQLITE_GRAPHRAG_DRY_RUN_BACKEND"
198    )]
199    pub dry_run_backend: bool,
200
201    /// Language for human-facing stderr messages. Accepts `en` or `pt`.
202    ///
203    /// Without the flag, detection falls back to `SQLITE_GRAPHRAG_LANG` and then
204    /// `LC_ALL`/`LANG`. JSON stdout stays deterministic and identical across
205    /// languages; only human-facing strings are affected.
206    #[arg(long, global = true, value_enum, value_name = "LANG")]
207    pub lang: Option<crate::i18n::Language>,
208
209    /// Time zone for `*_iso` fields in JSON output (for example `America/Sao_Paulo`).
210    ///
211    /// Accepts any IANA time zone name. Without the flag, it falls back to
212    /// `SQLITE_GRAPHRAG_DISPLAY_TZ`; if unset, UTC is used. Integer epoch fields
213    /// are not affected.
214    #[arg(long, global = true, value_name = "IANA")]
215    pub tz: Option<chrono_tz::Tz>,
216
217    /// Increase logging verbosity (-v=info, -vv=debug, -vvv=trace).
218    ///
219    /// Overrides `SQLITE_GRAPHRAG_LOG_LEVEL` env var when present. Logs are emitted
220    /// to stderr; JSON stdout is unaffected.
221    #[arg(short = 'v', long, global = true, action = clap::ArgAction::Count)]
222    pub verbose: u8,
223
224    /// v1.0.75 (G21 solution): extraction backend selector. Accepts
225    /// `llm` (default), `embedding` (legacy), `none`, or `both` (composite).
226    /// The `llm` backend invokes claude code / codex CLI headless to extract
227    /// entities and relationships; `embedding` is a permanent stub since
228    /// v1.0.79 (legacy fastembed pipeline removed) that returns a clear
229    /// migration error.
230    #[arg(long, global = true, value_name = "KIND", default_value = "llm")]
231    pub extraction_backend: Option<String>,
232
233    /// v1.0.79 (G42/S1): embedding dimensionality override (default 64).
234    ///
235    /// Precedence: this flag > `SQLITE_GRAPHRAG_EMBEDDING_DIM` env var >
236    /// the `dim` recorded in the database `schema_meta` > 64. Existing
237    /// databases keep their recorded dimensionality automatically; use
238    /// this flag only to migrate a corpus to a new dimensionality
239    /// (followed by `enrich --operation re-embed`). Range: [8, 4096].
240    #[arg(long, global = true, value_name = "N", value_parser = clap::value_parser!(u64).range(8..=4096))]
241    pub embedding_dim: Option<u64>,
242
243    /// v1.0.82 (GAP-003) / v1.0.84 (ADR-0042): LLM backend for embedding.
244    /// Accepts `auto` (detects via PATH, codex-first), `codex` (forces
245    /// `codex exec`), `claude` (forces `claude -p`; since v1.0.84 does NOT fall back to
246    /// codex — emits `AppError::Validation` if `claude` is absent),
247    /// `opencode` (forces `opencode run`), or `none`
248    /// (skips embedding; useful for tests). Honors the env var
249    /// `SQLITE_GRAPHRAG_LLM_BACKEND`.
250    #[arg(long, global = true, value_enum, default_value_t = LlmBackendChoice::Auto, env = "SQLITE_GRAPHRAG_LLM_BACKEND")]
251    pub llm_backend: LlmBackendChoice,
252
253    /// v1.0.82 (GAP-003): model to invoke on the chosen backend.
254    /// Honors the env var `SQLITE_GRAPHRAG_LLM_MODEL`. The default depends
255    /// on the backend (codex: `gpt-5.5`; claude: `claude-sonnet-4-6`).
256    #[arg(
257        long,
258        global = true,
259        value_name = "MODEL",
260        env = "SQLITE_GRAPHRAG_LLM_MODEL"
261    )]
262    pub llm_model: Option<String>,
263
264    /// v1.0.82 (GAP-003): path to the `claude` binary (overrides
265    /// PATH detection). Honors the env var `SQLITE_GRAPHRAG_CLAUDE_BINARY`.
266    #[arg(
267        long,
268        global = true,
269        value_name = "PATH",
270        env = "SQLITE_GRAPHRAG_CLAUDE_BINARY"
271    )]
272    pub claude_binary: Option<std::path::PathBuf>,
273
274    /// v1.0.89 (GAP-1): path to the `codex` binary (overrides
275    /// PATH detection). Honors the env var `SQLITE_GRAPHRAG_CODEX_BINARY`.
276    #[arg(
277        long,
278        global = true,
279        value_name = "PATH",
280        env = "SQLITE_GRAPHRAG_CODEX_BINARY"
281    )]
282    pub codex_binary: Option<std::path::PathBuf>,
283
284    /// v1.0.90 (GAP-OPENCODE-001): path to the `opencode` binary (overrides
285    /// PATH detection). Honors the env var `SQLITE_GRAPHRAG_OPENCODE_BINARY`.
286    #[arg(
287        long,
288        global = true,
289        value_name = "PATH",
290        env = "SQLITE_GRAPHRAG_OPENCODE_BINARY"
291    )]
292    pub opencode_binary: Option<std::path::PathBuf>,
293
294    /// v1.0.82 (GAP-005): chain of LLM backends tried in order
295    /// when the primary fails. Default `codex,claude,none`. Honors
296    /// the env var `SQLITE_GRAPHRAG_LLM_FALLBACK`.
297    #[arg(
298        long,
299        global = true,
300        default_value = "codex,claude,none",
301        env = "SQLITE_GRAPHRAG_LLM_FALLBACK"
302    )]
303    pub llm_fallback: String,
304
305    /// v1.0.82 (GAP-005): persists with a NULL embedding when all
306    /// backends in the chain fail. The memory stays in `pending_embeddings`
307    /// for reprocessing via `embedding retry`. Honors the env var
308    /// `SQLITE_GRAPHRAG_SKIP_EMBEDDING_ON_FAILURE`.
309    #[arg(
310        long,
311        global = true,
312        default_value_t = false,
313        value_parser = clap::builder::BoolishValueParser::new(),
314        env = "SQLITE_GRAPHRAG_SKIP_EMBEDDING_ON_FAILURE"
315    )]
316    pub skip_embedding_on_failure: bool,
317
318    /// v1.0.82 (GAP-004): host-wide limit of concurrent LLM
319    /// subprocesses. Default derived from `ncpus`. Honors the env var
320    /// `SQLITE_GRAPHRAG_LLM_MAX_HOST_CONCURRENCY`.
321    #[arg(
322        long,
323        global = true,
324        value_name = "N",
325        env = "SQLITE_GRAPHRAG_LLM_MAX_HOST_CONCURRENCY"
326    )]
327    pub llm_max_host_concurrency: Option<u32>,
328
329    /// v1.0.82 (GAP-004): seconds to wait for a free LLM slot
330    /// before failing with exit 75. Default 30s. Honors the env var
331    /// `SQLITE_GRAPHRAG_LLM_SLOT_WAIT_SECS`.
332    #[arg(
333        long,
334        global = true,
335        value_name = "SECONDS",
336        env = "SQLITE_GRAPHRAG_LLM_SLOT_WAIT_SECS"
337    )]
338    pub llm_slot_wait_secs: Option<u64>,
339
340    /// v1.0.82 (GAP-004): if set, fails immediately (exit 75)
341    /// when no LLM slot is free. Honors the env var
342    /// `SQLITE_GRAPHRAG_LLM_SLOT_NO_WAIT`.
343    #[arg(
344        long,
345        global = true,
346        default_value_t = false,
347        value_parser = clap::builder::BoolishValueParser::new(),
348        env = "SQLITE_GRAPHRAG_LLM_SLOT_NO_WAIT"
349    )]
350    pub llm_slot_no_wait: bool,
351
352    /// v1.0.93: embedding backend selector. `auto` tries OpenRouter API if key
353    /// available, falls back to LLM subprocess. `openrouter` requires API key.
354    /// `llm` forces subprocess. Honra env var `SQLITE_GRAPHRAG_EMBEDDING_BACKEND`.
355    #[arg(long, global = true, value_enum, default_value_t = EmbeddingBackendChoice::Auto, env = "SQLITE_GRAPHRAG_EMBEDDING_BACKEND")]
356    pub embedding_backend: EmbeddingBackendChoice,
357
358    /// v1.0.93: embedding model for the OpenRouter API. Required when
359    /// `--embedding-backend openrouter`. Honors env var `SQLITE_GRAPHRAG_EMBEDDING_MODEL`.
360    #[arg(
361        long,
362        global = true,
363        value_name = "MODEL",
364        env = "SQLITE_GRAPHRAG_EMBEDDING_MODEL"
365    )]
366    pub embedding_model: Option<String>,
367
368    /// v1.0.93: OpenRouter API key (prefer env var or config.toml over CLI flag
369    /// to avoid shell history exposure). Honra env var `OPENROUTER_API_KEY`.
370    #[arg(
371        long,
372        global = true,
373        value_name = "KEY",
374        hide = true,
375        env = "OPENROUTER_API_KEY"
376    )]
377    pub openrouter_api_key: Option<String>,
378
379    #[command(subcommand)]
380    pub command: Option<Commands>,
381}
382
383#[cfg(test)]
384mod json_only_format_tests {
385    use super::Cli;
386    use clap::Parser;
387
388    #[test]
389    fn restore_accepts_only_format_json() {
390        assert!(Cli::try_parse_from([
391            "sqlite-graphrag",
392            "restore",
393            "--name",
394            "mem",
395            "--version",
396            "1",
397            "--format",
398            "json",
399        ])
400        .is_ok());
401
402        assert!(Cli::try_parse_from([
403            "sqlite-graphrag",
404            "restore",
405            "--name",
406            "mem",
407            "--version",
408            "1",
409            "--format",
410            "text",
411        ])
412        .is_err());
413    }
414
415    #[test]
416    fn hybrid_search_accepts_only_format_json() {
417        assert!(Cli::try_parse_from([
418            "sqlite-graphrag",
419            "hybrid-search",
420            "query",
421            "--format",
422            "json",
423        ])
424        .is_ok());
425
426        assert!(Cli::try_parse_from([
427            "sqlite-graphrag",
428            "hybrid-search",
429            "query",
430            "--format",
431            "markdown",
432        ])
433        .is_err());
434    }
435
436    #[test]
437    fn remember_recall_rename_vacuum_json_only() {
438        assert!(Cli::try_parse_from([
439            "sqlite-graphrag",
440            "remember",
441            "--name",
442            "mem",
443            "--type",
444            "project",
445            "--description",
446            "desc",
447            "--format",
448            "json",
449        ])
450        .is_ok());
451        assert!(Cli::try_parse_from([
452            "sqlite-graphrag",
453            "remember",
454            "--name",
455            "mem",
456            "--type",
457            "project",
458            "--description",
459            "desc",
460            "--format",
461            "text",
462        ])
463        .is_err());
464
465        assert!(
466            Cli::try_parse_from(["sqlite-graphrag", "recall", "query", "--format", "json",])
467                .is_ok()
468        );
469        assert!(
470            Cli::try_parse_from(["sqlite-graphrag", "recall", "query", "--format", "text",])
471                .is_err()
472        );
473
474        assert!(Cli::try_parse_from([
475            "sqlite-graphrag",
476            "rename",
477            "--name",
478            "old",
479            "--new-name",
480            "new",
481            "--format",
482            "json",
483        ])
484        .is_ok());
485        assert!(Cli::try_parse_from([
486            "sqlite-graphrag",
487            "rename",
488            "--name",
489            "old",
490            "--new-name",
491            "new",
492            "--format",
493            "markdown",
494        ])
495        .is_err());
496
497        assert!(Cli::try_parse_from(["sqlite-graphrag", "vacuum", "--format", "json",]).is_ok());
498        assert!(Cli::try_parse_from(["sqlite-graphrag", "vacuum", "--format", "text",]).is_err());
499    }
500}
501
502impl Cli {
503    /// Validates concurrency flags and returns a localised descriptive error if invalid.
504    ///
505    /// Requires that `crate::i18n::init()` has already been called (happens before this
506    /// function in the `main` flow). In English it emits EN messages; in Portuguese it emits PT.
507    pub fn validate_flags(&self) -> Result<(), String> {
508        if let Some(n) = self.max_concurrency {
509            if n == 0 {
510                return Err(match current() {
511                    Language::English => "--max-concurrency must be >= 1".to_string(),
512                    Language::Portuguese => "--max-concurrency deve ser >= 1".to_string(),
513                });
514            }
515            let teto = max_concurrency_ceiling();
516            if n > teto {
517                return Err(match current() {
518                    Language::English => format!(
519                        "--max-concurrency {n} exceeds the ceiling of {teto} (2×nCPUs) on this system"
520                    ),
521                    Language::Portuguese => format!(
522                        "--max-concurrency {n} excede o teto de {teto} (2×nCPUs) neste sistema"
523                    ),
524                });
525            }
526        }
527        Ok(())
528    }
529}
530
531impl Commands {
532    /// Returns true for subcommands that load the ONNX model locally.
533    pub fn is_embedding_heavy(&self) -> bool {
534        matches!(
535            self,
536            Self::Init(_)
537                | Self::Remember(_)
538                | Self::RememberBatch(_)
539                | Self::Recall(_)
540                | Self::HybridSearch(_)
541                | Self::DeepResearch(_)
542        )
543    }
544
545    pub fn uses_cli_slot(&self) -> bool {
546        true
547    }
548
549    /// Read-only / no-embedding subcommands that MUST run without an embedding
550    /// API key. `init` warms a best-effort smoke test internally and degrades to
551    /// `ok_no_embedding` when the backend is unreachable; the `enrich` queue
552    /// inspectors (`--status` / `--list-dead` / `--requeue-dead` /
553    /// `--prune-dead-orphans`) never embed and never call the LLM. The eager
554    /// OpenRouter key preflight in `main` must skip its hard-fail for these.
555    pub fn tolerates_missing_embedding_key(&self) -> bool {
556        match self {
557            Self::Init(_) => true,
558            Self::Enrich(args) => {
559                args.status || args.list_dead || args.requeue_dead || args.prune_dead_orphans
560            }
561            _ => false,
562        }
563    }
564}
565
566/// GAP-E2E-010 (v1.0.89): `codex-models` accepts `--json` as a no-op so
567/// agents that append `--json` to every subcommand never see clap errors.
568/// The handler in `main.rs` always emits JSON on stdout; this flag is
569/// accepted and ignored for parity with the rest of the CLI surface.
570#[derive(Debug, clap::Args)]
571pub struct CodexModelsArgs {
572    /// No-op; JSON is always emitted on stdout by `codex-models`.
573    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
574    pub json: bool,
575}
576
577#[derive(Subcommand)]
578pub enum Commands {
579    /// Initialize database and download embedding model
580    #[command(after_long_help = "EXAMPLES:\n  \
581        # Initialize in current directory (default behavior)\n  \
582        sqlite-graphrag init\n\n  \
583        # Initialize at a specific path\n  \
584        sqlite-graphrag init --db /path/to/graphrag.sqlite\n\n  \
585        # Initialize using SQLITE_GRAPHRAG_HOME env var\n  \
586        SQLITE_GRAPHRAG_HOME=/data sqlite-graphrag init\n\n\
587        NOTES:\n  \
588        - `init` is OPTIONAL: any subsequent CRUD command auto-initializes graphrag.sqlite if missing.\n  \
589        - As a side effect, `init` warms a smoke-test embedding via the LLM-only one-shot pipeline.")]
590    Init(init::InitArgs),
591    /// Save a memory with optional entity graph
592    #[command(after_long_help = "EXAMPLES:\n  \
593        # Inline body\n  \
594        sqlite-graphrag remember --name onboarding --type user --description \"intro\" --body \"hello\"\n\n  \
595        # Body from file\n  \
596        sqlite-graphrag remember --name doc1 --type document --description \"...\" --body-file ./README.md\n\n  \
597        # Body from stdin (pipe)\n  \
598        cat README.md | sqlite-graphrag remember --name doc1 --type document --description \"...\" --body-stdin\n\n  \
599        # Enable automatic URL extraction (URL-regex only since v1.0.79; GLiNER removed)\n  \
600        sqlite-graphrag remember --name rich --type note --description \"...\" --body \"...\" --enable-ner")]
601    Remember(remember::RememberArgs),
602    /// Batch-create memories from NDJSON stdin (one invocation, one slot)
603    #[command(after_long_help = "EXAMPLES:\n  \
604        # Batch create from NDJSON\n  \
605        cat memories.ndjson | sqlite-graphrag remember-batch --force-merge --json\n\n  \
606        # Atomic batch\n  \
607        cat memories.ndjson | sqlite-graphrag remember-batch --transaction --json")]
608    RememberBatch(remember_batch::RememberBatchArgs),
609    /// Bulk-ingest every file under a directory as separate memories (NDJSON output)
610    Ingest(ingest::IngestArgs),
611    /// Search memories semantically
612    #[command(after_long_help = "EXAMPLES:\n  \
613        # Top 10 semantic matches (default)\n  \
614        sqlite-graphrag recall \"agent memory\"\n\n  \
615        # Top 3 only\n  \
616        sqlite-graphrag recall \"agent memory\" -k 3\n\n  \
617        # Search across all namespaces\n  \
618        sqlite-graphrag recall \"agent memory\" --all-namespaces\n\n  \
619        # Disable graph traversal (vector-only)\n  \
620        sqlite-graphrag recall \"agent memory\" --no-graph")]
621    Recall(recall::RecallArgs),
622    /// Read a memory by exact name
623    Read(read::ReadArgs),
624    /// List memories with filters
625    List(list::ListArgs),
626    /// Soft-delete a memory
627    Forget(forget::ForgetArgs),
628    /// Permanently delete soft-deleted memories
629    Purge(purge::PurgeArgs),
630    /// Rename a memory preserving history
631    Rename(rename::RenameArgs),
632    /// Edit a memory's body or description
633    Edit(edit::EditArgs),
634    /// List all versions of a memory
635    History(history::HistoryArgs),
636    /// Restore a memory to a previous version
637    Restore(restore::RestoreArgs),
638    /// Search using hybrid vector + full-text search
639    #[command(after_long_help = "EXAMPLES:\n  \
640        # Hybrid search combining KNN + FTS5 BM25 with RRF\n  \
641        sqlite-graphrag hybrid-search \"agent memory architecture\"\n\n  \
642        # Custom weights for vector vs full-text components\n  \
643        sqlite-graphrag hybrid-search \"agent\" --weight-vec 0.7 --weight-fts 0.3")]
644    HybridSearch(hybrid_search::HybridSearchArgs),
645    /// Show database health
646    Health(health::HealthArgs),
647    /// Apply pending schema migrations
648    Migrate(migrate::MigrateArgs),
649    /// Resolve namespace precedence for the current invocation
650    NamespaceDetect(namespace_detect::NamespaceDetectArgs),
651    /// Run PRAGMA optimize on the database
652    Optimize(optimize::OptimizeArgs),
653    /// Show database statistics
654    Stats(stats::StatsArgs),
655    /// Create a checkpointed copy safe for file sync
656    SyncSafeCopy(sync_safe_copy::SyncSafeCopyArgs),
657    /// Back up the database using the SQLite Online Backup API
658    Backup(backup::BackupArgs),
659    /// Run VACUUM after checkpointing the WAL
660    Vacuum(vacuum::VacuumArgs),
661    /// Create an explicit relationship between two entities
662    Link(link::LinkArgs),
663    /// Remove a specific relationship between two entities
664    Unlink(unlink::UnlinkArgs),
665    /// Deep parallel multi-hop GraphRAG research
666    #[command(name = "deep-research")]
667    DeepResearch(deep_research::DeepResearchArgs),
668    /// List memories connected via the entity graph
669    Related(related::RelatedArgs),
670    /// Export a graph snapshot in json, dot or mermaid
671    Graph(graph_export::GraphArgs),
672    /// Export memories as NDJSON (one JSON line per memory, plus a summary line)
673    Export(export::ExportArgs),
674    /// FTS5 full-text search index management (rebuild or check)
675    Fts(fts::FtsArgs),
676    /// Vector index maintenance (orphan detection, purge, stats) — G39
677    Vec(vec::VecArgs),
678    /// List codex OAuth models accepted by ChatGPT Pro (G33).
679    ///
680    /// GAP-E2E-010 (v1.0.89): accepts `--json` as a no-op (JSON is always
681    /// emitted on stdout) so the flag never breaks agent pipelines that
682    /// append `--json` to every invocation.
683    #[command(name = "codex-models")]
684    CodexModels(CodexModelsArgs),
685    /// Bulk-delete all relationships of a given type (e.g. mentions)
686    PruneRelations(prune_relations::PruneRelationsArgs),
687    /// Remove NER bindings (memory_entities rows) for an entity or all entities
688    #[command(name = "prune-ner")]
689    PruneNer(prune_ner::PruneNerArgs),
690    /// Inspect and manage cross-process LLM slot semaphore (GAP-004, v1.0.82)
691    Slots(slots::SlotsArgs),
692    /// Inspect and manage the `remember` checkpoint queue (GAP-001, v1.0.82)
693    Pending(pending::PendingArgs),
694    /// Health and per-entry inspection of the pending-embeddings queue (GAP-005, v1.0.82)
695    Embedding(embedding::EmbeddingArgs),
696    /// Batch operations over the pending-embeddings queue (GAP-005, v1.0.82)
697    #[command(name = "pending-embeddings")]
698    PendingEmbeddings(pending_embeddings::PendingEmbeddingsArgs),
699    /// Remove entities that have no memories and no relationships
700    CleanupOrphans(cleanup_orphans::CleanupOrphansArgs),
701    /// List entities linked to a specific memory
702    MemoryEntities(memory_entities::MemoryEntitiesArgs),
703    /// Manage cached resources (embedding models, etc.)
704    Cache(cache::CacheArgs),
705    /// Delete an entity and all its relationships from the graph
706    #[command(name = "delete-entity")]
707    DeleteEntity(delete_entity::DeleteEntityArgs),
708    /// Reclassify one entity or a batch of entities to a new type
709    Reclassify(reclassify::ReclassifyArgs),
710    /// Rename an entity preserving all relationships and memory bindings
711    #[command(name = "rename-entity")]
712    RenameEntity(rename_entity::RenameEntityArgs),
713    /// Merge multiple source entities into a single target entity
714    #[command(name = "merge-entities")]
715    MergeEntities(merge_entities::MergeEntitiesArgs),
716    /// Enrich graph memories and entities using an LLM provider
717    Enrich(enrich::EnrichArgs),
718    /// Reclassify relationship types across the graph using rules or LLM judgment
719    #[command(name = "reclassify-relation")]
720    ReclassifyRelation(reclassify_relation::ReclassifyRelationArgs),
721    /// Normalize entity names (deduplicate, kebab-case, merge near-duplicates)
722    #[command(name = "normalize-entities")]
723    NormalizeEntities(normalize_entities::NormalizeEntitiesArgs),
724    /// Generate shell completions for Bash, Zsh, Fish, PowerShell, or Elvish
725    Completions(completions::CompletionsArgs),
726    #[command(name = "debug-schema", hide = true)]
727    DebugSchema(debug_schema::DebugSchemaArgs),
728    /// Manage API keys and diagnose provider configuration (v1.0.93)
729    Config(config_cmd::ConfigArgs),
730}
731// FIX-1 (v1.0.89): manual `Debug` impl so test panic messages that print
732// `{:?}` on a captured `Commands` variant compile without requiring every
733// contained subcommand arg struct to derive `Debug`. The Debug output is
734// only used in test assertions for diagnostic messages; we emit the variant
735// name only — arg payload is intentionally omitted.
736impl std::fmt::Debug for Commands {
737    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
738        let name = match self {
739            Self::Init(_) => "Init",
740            Self::Health(_) => "Health",
741            Self::Stats(_) => "Stats",
742            Self::List(_) => "List",
743            Self::Read(_) => "Read",
744            Self::Edit(_) => "Edit",
745            Self::Rename(_) => "Rename",
746            Self::Restore(_) => "Restore",
747            Self::History(_) => "History",
748            Self::Forget(_) => "Forget",
749            Self::Purge(_) => "Purge",
750            Self::Remember(_) => "Remember",
751            Self::RememberBatch(_) => "RememberBatch",
752            Self::Recall(_) => "Recall",
753            Self::HybridSearch(_) => "HybridSearch",
754            Self::Enrich(_) => "Enrich",
755            Self::Ingest(_) => "Ingest",
756            Self::Optimize(_) => "Optimize",
757            Self::Migrate(_) => "Migrate",
758            Self::SyncSafeCopy(_) => "SyncSafeCopy",
759            Self::Backup(_) => "Backup",
760            Self::Vacuum(_) => "Vacuum",
761            Self::Link(_) => "Link",
762            Self::Unlink(_) => "Unlink",
763            Self::DeepResearch(_) => "DeepResearch",
764            Self::Related(_) => "Related",
765            Self::Graph(_) => "Graph",
766            Self::Export(_) => "Export",
767            Self::Fts(_) => "Fts",
768            Self::Vec(_) => "Vec",
769            Self::CodexModels(_) => "CodexModels",
770            Self::PruneRelations(_) => "PruneRelations",
771            Self::PruneNer(_) => "PruneNer",
772            Self::Slots(_) => "Slots",
773            Self::Pending(_) => "Pending",
774            Self::Embedding(_) => "Embedding",
775            Self::PendingEmbeddings(_) => "PendingEmbeddings",
776            Self::CleanupOrphans(_) => "CleanupOrphans",
777            Self::MemoryEntities(_) => "MemoryEntities",
778            Self::Cache(_) => "Cache",
779            Self::DeleteEntity(_) => "DeleteEntity",
780            Self::Reclassify(_) => "Reclassify",
781            Self::RenameEntity(_) => "RenameEntity",
782            Self::ReclassifyRelation(_) => "ReclassifyRelation",
783            Self::NormalizeEntities(_) => "NormalizeEntities",
784            Self::MergeEntities(_) => "MergeEntities",
785            Self::NamespaceDetect(_) => "NamespaceDetect",
786            Self::Completions(_) => "Completions",
787            Self::DebugSchema(_) => "DebugSchema",
788            Self::Config(_) => "Config",
789        };
790        f.write_str(name)
791    }
792}
793
794#[derive(Copy, Clone, Debug, Default, clap::ValueEnum)]
795pub enum MemoryType {
796    User,
797    Feedback,
798    Project,
799    Reference,
800    Decision,
801    Incident,
802    Skill,
803    #[default]
804    Document,
805    Note,
806}
807
808#[cfg(test)]
809mod heavy_concurrency_tests {
810    use super::*;
811
812    #[test]
813    fn command_heavy_detects_init_and_embeddings() {
814        let init = Cli::try_parse_from(["sqlite-graphrag", "init"]).expect("parse init");
815        assert!(init
816            .command
817            .as_ref()
818            .is_some_and(|c| c.is_embedding_heavy()));
819
820        let remember = Cli::try_parse_from([
821            "sqlite-graphrag",
822            "remember",
823            "--name",
824            "test-memory",
825            "--type",
826            "project",
827            "--description",
828            "desc",
829        ])
830        .expect("parse remember");
831        assert!(remember
832            .command
833            .as_ref()
834            .is_some_and(|c| c.is_embedding_heavy()));
835
836        let recall =
837            Cli::try_parse_from(["sqlite-graphrag", "recall", "query"]).expect("parse recall");
838        assert!(recall
839            .command
840            .as_ref()
841            .is_some_and(|c| c.is_embedding_heavy()));
842
843        let hybrid = Cli::try_parse_from(["sqlite-graphrag", "hybrid-search", "query"])
844            .expect("parse hybrid");
845        assert!(hybrid
846            .command
847            .as_ref()
848            .is_some_and(|c| c.is_embedding_heavy()));
849    }
850
851    #[test]
852    fn command_light_does_not_mark_stats() {
853        let stats = Cli::try_parse_from(["sqlite-graphrag", "stats"]).expect("parse stats");
854        assert!(!stats
855            .command
856            .as_ref()
857            .is_some_and(|c| c.is_embedding_heavy()));
858    }
859}
860
861impl MemoryType {
862    pub fn as_str(&self) -> &'static str {
863        match self {
864            Self::User => "user",
865            Self::Feedback => "feedback",
866            Self::Project => "project",
867            Self::Reference => "reference",
868            Self::Decision => "decision",
869            Self::Incident => "incident",
870            Self::Skill => "skill",
871            Self::Document => "document",
872            Self::Note => "note",
873        }
874    }
875}
876
877/// GAP-SG-31/33/34/35/30: parse-time contracts for the Fase G clap fixes.
878#[cfg(test)]
879mod fase_g_parsing_tests {
880    use super::Cli;
881    use clap::Parser;
882
883    /// GAP-SG-31(b): `enrich --status` parses without --operation/--mode.
884    #[test]
885    fn enrich_status_optional_operation_and_mode() {
886        assert!(
887            Cli::try_parse_from(["sqlite-graphrag", "enrich", "--status"]).is_ok(),
888            "--status alone must not require --operation/--mode"
889        );
890        assert!(
891            Cli::try_parse_from(["sqlite-graphrag", "enrich", "--list-dead"]).is_ok(),
892            "--list-dead is read-only and must not require --operation/--mode"
893        );
894        // Write path still requires both: bare `enrich` is rejected.
895        assert!(
896            Cli::try_parse_from(["sqlite-graphrag", "enrich"]).is_err(),
897            "bare enrich (no status/list-dead/requeue-dead) must require --operation/--mode"
898        );
899        // Full write invocation still parses.
900        assert!(Cli::try_parse_from([
901            "sqlite-graphrag",
902            "enrich",
903            "--operation",
904            "memory-bindings",
905            "--mode",
906            "openrouter",
907        ])
908        .is_ok());
909    }
910
911    /// GAP-SG-34(c): `config doctor --json` parses (no-op flag accepted).
912    #[test]
913    fn config_doctor_accepts_json() {
914        assert!(Cli::try_parse_from(["sqlite-graphrag", "config", "doctor", "--json"]).is_ok());
915        assert!(Cli::try_parse_from(["sqlite-graphrag", "config", "list-keys", "--json"]).is_ok());
916    }
917
918    /// GAP-SG-33(d): a hyphen-led --description value is accepted, not parsed
919    /// as a flag.
920    #[test]
921    fn remember_description_allows_leading_hyphen() {
922        assert!(Cli::try_parse_from([
923            "sqlite-graphrag",
924            "remember",
925            "--name",
926            "mem",
927            "--type",
928            "note",
929            "--description",
930            "- bullet description",
931        ])
932        .is_ok());
933    }
934
935    /// GAP-SG-35(e): `remember-batch --llm-parallelism N` parses.
936    #[test]
937    fn remember_batch_accepts_llm_parallelism() {
938        assert!(Cli::try_parse_from([
939            "sqlite-graphrag",
940            "remember-batch",
941            "--llm-parallelism",
942            "4"
943        ])
944        .is_ok());
945    }
946
947    /// GAP-SG-30: --graph-file combines with a body source but conflicts with
948    /// the other graph-input flags.
949    #[test]
950    fn remember_graph_file_combines_with_body_but_conflicts_with_graph_stdin() {
951        assert!(
952            Cli::try_parse_from([
953                "sqlite-graphrag",
954                "remember",
955                "--name",
956                "mem",
957                "--type",
958                "note",
959                "--body",
960                "inline body",
961                "--graph-file",
962                "/tmp/graph.json",
963            ])
964            .is_ok(),
965            "--body + --graph-file must coexist"
966        );
967        assert!(
968            Cli::try_parse_from([
969                "sqlite-graphrag",
970                "remember",
971                "--name",
972                "mem",
973                "--type",
974                "note",
975                "--graph-file",
976                "/tmp/graph.json",
977                "--graph-stdin",
978            ])
979            .is_err(),
980            "--graph-file conflicts with --graph-stdin"
981        );
982    }
983}