Skip to main content

sqlite_graphrag/
cli.rs

1//! CLI argument structs and command surface (clap-based).
2//!
3//! Defines `Cli` and all subcommand enums; contains no business logic.
4
5use crate::commands::*;
6use crate::i18n::{current, Language};
7use clap::{Parser, Subcommand};
8
9/// Returns the maximum simultaneous invocations allowed by the CPU heuristic.
10fn max_concurrency_ceiling() -> usize {
11    std::thread::available_parallelism()
12        .map(|n| n.get() * 2)
13        .unwrap_or(8)
14}
15
16#[derive(Copy, Clone, Debug, PartialEq, Eq, clap::ValueEnum)]
17pub enum GraphExportFormat {
18    Json,
19    Dot,
20    Mermaid,
21    /// Stream one JSON object per entity, then one per edge, then a summary line.
22    Ndjson,
23}
24
25#[derive(Parser)]
26#[command(name = "sqlite-graphrag")]
27#[command(version)]
28#[command(about = "Local GraphRAG memory for LLMs in a single SQLite file")]
29#[command(arg_required_else_help = true)]
30pub struct Cli {
31    /// Maximum number of simultaneous CLI invocations allowed (default: 4).
32    ///
33    /// Caps the counting semaphore used for CLI concurrency slots. The value must
34    /// stay within [1, 2×nCPUs]. Values above the ceiling are rejected with exit 2.
35    #[arg(long, global = true, value_name = "N")]
36    pub max_concurrency: Option<usize>,
37
38    /// Wait up to SECONDS for a free concurrency slot before giving up (exit 75).
39    ///
40    /// Useful in retrying agent pipelines: the process polls every 500 ms until a
41    /// slot opens or the timeout expires. Default: 300s (5 minutes).
42    #[arg(long, global = true, value_name = "SECONDS")]
43    pub wait_lock: Option<u64>,
44
45    /// Skip the available-memory check before loading the model.
46    ///
47    /// Exclusive use in automated tests where real allocation does not occur.
48    #[arg(long, global = true, hide = true, default_value_t = false)]
49    pub skip_memory_guard: bool,
50
51    /// Language for human-facing stderr messages. Accepts `en` or `pt`.
52    ///
53    /// Without the flag, detection falls back to `SQLITE_GRAPHRAG_LANG` and then
54    /// `LC_ALL`/`LANG`. JSON stdout stays deterministic and identical across
55    /// languages; only human-facing strings are affected.
56    #[arg(long, global = true, value_enum, value_name = "LANG")]
57    pub lang: Option<crate::i18n::Language>,
58
59    /// Time zone for `*_iso` fields in JSON output (for example `America/Sao_Paulo`).
60    ///
61    /// Accepts any IANA time zone name. Without the flag, it falls back to
62    /// `SQLITE_GRAPHRAG_DISPLAY_TZ`; if unset, UTC is used. Integer epoch fields
63    /// are not affected.
64    #[arg(long, global = true, value_name = "IANA")]
65    pub tz: Option<chrono_tz::Tz>,
66
67    /// Increase logging verbosity (-v=info, -vv=debug, -vvv=trace).
68    ///
69    /// Overrides `SQLITE_GRAPHRAG_LOG_LEVEL` env var when present. Logs are emitted
70    /// to stderr; JSON stdout is unaffected.
71    #[arg(short = 'v', long, global = true, action = clap::ArgAction::Count)]
72    pub verbose: u8,
73
74    /// v1.0.75 (G21 solution): extraction backend selector. Accepts
75    /// `llm` (default), `embedding` (legacy), `none`, or `both` (composite).
76    /// The `llm` backend invokes claude code / codex CLI headless to extract
77    /// entities and relationships; `embedding` uses the legacy fastembed
78    /// pipeline (requires the `embedding-legacy` feature at compile time).
79    #[arg(long, global = true, value_name = "KIND", default_value = "llm")]
80    pub extraction_backend: Option<String>,
81
82    #[command(subcommand)]
83    pub command: Commands,
84}
85
86#[cfg(test)]
87mod json_only_format_tests {
88    use super::Cli;
89    use clap::Parser;
90
91    #[test]
92    fn restore_accepts_only_format_json() {
93        assert!(Cli::try_parse_from([
94            "sqlite-graphrag",
95            "restore",
96            "--name",
97            "mem",
98            "--version",
99            "1",
100            "--format",
101            "json",
102        ])
103        .is_ok());
104
105        assert!(Cli::try_parse_from([
106            "sqlite-graphrag",
107            "restore",
108            "--name",
109            "mem",
110            "--version",
111            "1",
112            "--format",
113            "text",
114        ])
115        .is_err());
116    }
117
118    #[test]
119    fn hybrid_search_accepts_only_format_json() {
120        assert!(Cli::try_parse_from([
121            "sqlite-graphrag",
122            "hybrid-search",
123            "query",
124            "--format",
125            "json",
126        ])
127        .is_ok());
128
129        assert!(Cli::try_parse_from([
130            "sqlite-graphrag",
131            "hybrid-search",
132            "query",
133            "--format",
134            "markdown",
135        ])
136        .is_err());
137    }
138
139    #[test]
140    fn remember_recall_rename_vacuum_json_only() {
141        assert!(Cli::try_parse_from([
142            "sqlite-graphrag",
143            "remember",
144            "--name",
145            "mem",
146            "--type",
147            "project",
148            "--description",
149            "desc",
150            "--format",
151            "json",
152        ])
153        .is_ok());
154        assert!(Cli::try_parse_from([
155            "sqlite-graphrag",
156            "remember",
157            "--name",
158            "mem",
159            "--type",
160            "project",
161            "--description",
162            "desc",
163            "--format",
164            "text",
165        ])
166        .is_err());
167
168        assert!(
169            Cli::try_parse_from(["sqlite-graphrag", "recall", "query", "--format", "json",])
170                .is_ok()
171        );
172        assert!(
173            Cli::try_parse_from(["sqlite-graphrag", "recall", "query", "--format", "text",])
174                .is_err()
175        );
176
177        assert!(Cli::try_parse_from([
178            "sqlite-graphrag",
179            "rename",
180            "--name",
181            "old",
182            "--new-name",
183            "new",
184            "--format",
185            "json",
186        ])
187        .is_ok());
188        assert!(Cli::try_parse_from([
189            "sqlite-graphrag",
190            "rename",
191            "--name",
192            "old",
193            "--new-name",
194            "new",
195            "--format",
196            "markdown",
197        ])
198        .is_err());
199
200        assert!(Cli::try_parse_from(["sqlite-graphrag", "vacuum", "--format", "json",]).is_ok());
201        assert!(Cli::try_parse_from(["sqlite-graphrag", "vacuum", "--format", "text",]).is_err());
202    }
203}
204
205impl Cli {
206    /// Validates concurrency flags and returns a localised descriptive error if invalid.
207    ///
208    /// Requires that `crate::i18n::init()` has already been called (happens before this
209    /// function in the `main` flow). In English it emits EN messages; in Portuguese it emits PT.
210    pub fn validate_flags(&self) -> Result<(), String> {
211        if let Some(n) = self.max_concurrency {
212            if n == 0 {
213                return Err(match current() {
214                    Language::English => "--max-concurrency must be >= 1".to_string(),
215                    Language::Portuguese => "--max-concurrency deve ser >= 1".to_string(),
216                });
217            }
218            let teto = max_concurrency_ceiling();
219            if n > teto {
220                return Err(match current() {
221                    Language::English => format!(
222                        "--max-concurrency {n} exceeds the ceiling of {teto} (2×nCPUs) on this system"
223                    ),
224                    Language::Portuguese => format!(
225                        "--max-concurrency {n} excede o teto de {teto} (2×nCPUs) neste sistema"
226                    ),
227                });
228            }
229        }
230        Ok(())
231    }
232}
233
234impl Commands {
235    /// Returns true for subcommands that load the ONNX model locally.
236    pub fn is_embedding_heavy(&self) -> bool {
237        matches!(
238            self,
239            Self::Init(_)
240                | Self::Remember(_)
241                | Self::RememberBatch(_)
242                | Self::Recall(_)
243                | Self::HybridSearch(_)
244                | Self::DeepResearch(_)
245        )
246    }
247
248    pub fn uses_cli_slot(&self) -> bool {
249        true
250    }
251}
252
253#[derive(Subcommand)]
254pub enum Commands {
255    /// Initialize database and download embedding model
256    #[command(after_long_help = "EXAMPLES:\n  \
257        # Initialize in current directory (default behavior)\n  \
258        sqlite-graphrag init\n\n  \
259        # Initialize at a specific path\n  \
260        sqlite-graphrag init --db /path/to/graphrag.sqlite\n\n  \
261        # Initialize using SQLITE_GRAPHRAG_HOME env var\n  \
262        SQLITE_GRAPHRAG_HOME=/data sqlite-graphrag init\n\n\
263        NOTES:\n  \
264        - `init` is OPTIONAL: any subsequent CRUD command auto-initializes graphrag.sqlite if missing.\n  \
265        - As a side effect, `init` warms a smoke-test embedding via the LLM-only one-shot pipeline.")]
266    Init(init::InitArgs),
267    /// Save a memory with optional entity graph
268    #[command(after_long_help = "EXAMPLES:\n  \
269        # Inline body\n  \
270        sqlite-graphrag remember --name onboarding --type user --description \"intro\" --body \"hello\"\n\n  \
271        # Body from file\n  \
272        sqlite-graphrag remember --name doc1 --type document --description \"...\" --body-file ./README.md\n\n  \
273        # Body from stdin (pipe)\n  \
274        cat README.md | sqlite-graphrag remember --name doc1 --type document --description \"...\" --body-stdin\n\n  \
275        # Enable GLiNER entity extraction (disabled by default)\n  \
276        sqlite-graphrag remember --name rich --type note --description \"...\" --body \"...\" --enable-ner")]
277    Remember(remember::RememberArgs),
278    /// Batch-create memories from NDJSON stdin (one invocation, one slot)
279    #[command(after_long_help = "EXAMPLES:\n  \
280        # Batch create from NDJSON\n  \
281        cat memories.ndjson | sqlite-graphrag remember-batch --force-merge --json\n\n  \
282        # Atomic batch\n  \
283        cat memories.ndjson | sqlite-graphrag remember-batch --transaction --json")]
284    RememberBatch(remember_batch::RememberBatchArgs),
285    /// Bulk-ingest every file under a directory as separate memories (NDJSON output)
286    Ingest(ingest::IngestArgs),
287    /// Search memories semantically
288    #[command(after_long_help = "EXAMPLES:\n  \
289        # Top 10 semantic matches (default)\n  \
290        sqlite-graphrag recall \"agent memory\"\n\n  \
291        # Top 3 only\n  \
292        sqlite-graphrag recall \"agent memory\" -k 3\n\n  \
293        # Search across all namespaces\n  \
294        sqlite-graphrag recall \"agent memory\" --all-namespaces\n\n  \
295        # Disable graph traversal (vector-only)\n  \
296        sqlite-graphrag recall \"agent memory\" --no-graph")]
297    Recall(recall::RecallArgs),
298    /// Read a memory by exact name
299    Read(read::ReadArgs),
300    /// List memories with filters
301    List(list::ListArgs),
302    /// Soft-delete a memory
303    Forget(forget::ForgetArgs),
304    /// Permanently delete soft-deleted memories
305    Purge(purge::PurgeArgs),
306    /// Rename a memory preserving history
307    Rename(rename::RenameArgs),
308    /// Edit a memory's body or description
309    Edit(edit::EditArgs),
310    /// List all versions of a memory
311    History(history::HistoryArgs),
312    /// Restore a memory to a previous version
313    Restore(restore::RestoreArgs),
314    /// Search using hybrid vector + full-text search
315    #[command(after_long_help = "EXAMPLES:\n  \
316        # Hybrid search combining KNN + FTS5 BM25 with RRF\n  \
317        sqlite-graphrag hybrid-search \"agent memory architecture\"\n\n  \
318        # Custom weights for vector vs full-text components\n  \
319        sqlite-graphrag hybrid-search \"agent\" --weight-vec 0.7 --weight-fts 0.3")]
320    HybridSearch(hybrid_search::HybridSearchArgs),
321    /// Show database health
322    Health(health::HealthArgs),
323    /// Apply pending schema migrations
324    Migrate(migrate::MigrateArgs),
325    /// Resolve namespace precedence for the current invocation
326    NamespaceDetect(namespace_detect::NamespaceDetectArgs),
327    /// Run PRAGMA optimize on the database
328    Optimize(optimize::OptimizeArgs),
329    /// Show database statistics
330    Stats(stats::StatsArgs),
331    /// Create a checkpointed copy safe for file sync
332    SyncSafeCopy(sync_safe_copy::SyncSafeCopyArgs),
333    /// Back up the database using the SQLite Online Backup API
334    Backup(backup::BackupArgs),
335    /// Run VACUUM after checkpointing the WAL
336    Vacuum(vacuum::VacuumArgs),
337    /// Create an explicit relationship between two entities
338    Link(link::LinkArgs),
339    /// Remove a specific relationship between two entities
340    Unlink(unlink::UnlinkArgs),
341    /// Deep parallel multi-hop GraphRAG research
342    #[command(name = "deep-research")]
343    DeepResearch(deep_research::DeepResearchArgs),
344    /// List memories connected via the entity graph
345    Related(related::RelatedArgs),
346    /// Export a graph snapshot in json, dot or mermaid
347    Graph(graph_export::GraphArgs),
348    /// Export memories as NDJSON (one JSON line per memory, plus a summary line)
349    Export(export::ExportArgs),
350    /// FTS5 full-text search index management (rebuild or check)
351    Fts(fts::FtsArgs),
352    /// Vector index maintenance (orphan detection, purge, stats) — G39
353    Vec(vec::VecArgs),
354    /// List codex OAuth models accepted by ChatGPT Pro (G33).
355    #[command(name = "codex-models")]
356    CodexModels,
357    /// Bulk-delete all relationships of a given type (e.g. mentions)
358    PruneRelations(prune_relations::PruneRelationsArgs),
359    /// Remove NER bindings (memory_entities rows) for an entity or all entities
360    #[command(name = "prune-ner")]
361    PruneNer(prune_ner::PruneNerArgs),
362    /// Remove entities that have no memories and no relationships
363    CleanupOrphans(cleanup_orphans::CleanupOrphansArgs),
364    /// List entities linked to a specific memory
365    MemoryEntities(memory_entities::MemoryEntitiesArgs),
366    /// Manage cached resources (embedding models, etc.)
367    Cache(cache::CacheArgs),
368    /// Delete an entity and all its relationships from the graph
369    #[command(name = "delete-entity")]
370    DeleteEntity(delete_entity::DeleteEntityArgs),
371    /// Reclassify one entity or a batch of entities to a new type
372    Reclassify(reclassify::ReclassifyArgs),
373    /// Rename an entity preserving all relationships and memory bindings
374    #[command(name = "rename-entity")]
375    RenameEntity(rename_entity::RenameEntityArgs),
376    /// Merge multiple source entities into a single target entity
377    #[command(name = "merge-entities")]
378    MergeEntities(merge_entities::MergeEntitiesArgs),
379    /// Enrich graph memories and entities using an LLM provider
380    Enrich(enrich::EnrichArgs),
381    /// Reclassify relationship types across the graph using rules or LLM judgment
382    #[command(name = "reclassify-relation")]
383    ReclassifyRelation(reclassify_relation::ReclassifyRelationArgs),
384    /// Normalize entity names (deduplicate, kebab-case, merge near-duplicates)
385    #[command(name = "normalize-entities")]
386    NormalizeEntities(normalize_entities::NormalizeEntitiesArgs),
387    /// Generate shell completions for Bash, Zsh, Fish, PowerShell, or Elvish
388    Completions(completions::CompletionsArgs),
389    #[command(name = "debug-schema", hide = true)]
390    DebugSchema(debug_schema::DebugSchemaArgs),
391}
392
393#[derive(Copy, Clone, Debug, Default, clap::ValueEnum)]
394pub enum MemoryType {
395    User,
396    Feedback,
397    Project,
398    Reference,
399    Decision,
400    Incident,
401    Skill,
402    #[default]
403    Document,
404    Note,
405}
406
407#[cfg(test)]
408mod heavy_concurrency_tests {
409    use super::*;
410
411    #[test]
412    fn command_heavy_detects_init_and_embeddings() {
413        let init = Cli::try_parse_from(["sqlite-graphrag", "init"]).expect("parse init");
414        assert!(init.command.is_embedding_heavy());
415
416        let remember = Cli::try_parse_from([
417            "sqlite-graphrag",
418            "remember",
419            "--name",
420            "test-memory",
421            "--type",
422            "project",
423            "--description",
424            "desc",
425        ])
426        .expect("parse remember");
427        assert!(remember.command.is_embedding_heavy());
428
429        let recall =
430            Cli::try_parse_from(["sqlite-graphrag", "recall", "query"]).expect("parse recall");
431        assert!(recall.command.is_embedding_heavy());
432
433        let hybrid = Cli::try_parse_from(["sqlite-graphrag", "hybrid-search", "query"])
434            .expect("parse hybrid");
435        assert!(hybrid.command.is_embedding_heavy());
436    }
437
438    #[test]
439    fn command_light_does_not_mark_stats() {
440        let stats = Cli::try_parse_from(["sqlite-graphrag", "stats"]).expect("parse stats");
441        assert!(!stats.command.is_embedding_heavy());
442    }
443}
444
445impl MemoryType {
446    pub fn as_str(&self) -> &'static str {
447        match self {
448            Self::User => "user",
449            Self::Feedback => "feedback",
450            Self::Project => "project",
451            Self::Reference => "reference",
452            Self::Decision => "decision",
453            Self::Incident => "incident",
454            Self::Skill => "skill",
455            Self::Document => "document",
456            Self::Note => "note",
457        }
458    }
459}