Skip to main content

sqlite_graphrag/
cli.rs

1//! CLI argument structs and command surface (clap-based).
2//!
3//! Defines `Cli` and all subcommand enums; contains no business logic.
4
5use crate::commands::*;
6use crate::i18n::{current, Language};
7use clap::{Parser, Subcommand};
8
9/// Returns the maximum simultaneous invocations allowed by the CPU heuristic.
10fn max_concurrency_ceiling() -> usize {
11    std::thread::available_parallelism()
12        .map(|n| n.get() * 2)
13        .unwrap_or(8)
14}
15
16#[derive(Copy, Clone, Debug, PartialEq, Eq, clap::ValueEnum)]
17pub enum GraphExportFormat {
18    Json,
19    Dot,
20    Mermaid,
21    /// Stream one JSON object per entity, then one per edge, then a summary line.
22    Ndjson,
23}
24
25#[derive(Parser)]
26#[command(name = "sqlite-graphrag")]
27#[command(version)]
28#[command(about = "Local GraphRAG memory for LLMs in a single SQLite file")]
29#[command(arg_required_else_help = true)]
30pub struct Cli {
31    /// Maximum number of simultaneous CLI invocations allowed (default: 4).
32    ///
33    /// Caps the counting semaphore used for CLI concurrency slots. The value must
34    /// stay within [1, 2×nCPUs]. Values above the ceiling are rejected with exit 2.
35    #[arg(long, global = true, value_name = "N")]
36    pub max_concurrency: Option<usize>,
37
38    /// Wait up to SECONDS for a free concurrency slot before giving up (exit 75).
39    ///
40    /// Useful in retrying agent pipelines: the process polls every 500 ms until a
41    /// slot opens or the timeout expires. Default: 300s (5 minutes).
42    #[arg(long, global = true, value_name = "SECONDS")]
43    pub wait_lock: Option<u64>,
44
45    /// Skip the available-memory check before loading the model.
46    ///
47    /// Exclusive use in automated tests where real allocation does not occur.
48    #[arg(long, global = true, hide = true, default_value_t = false)]
49    pub skip_memory_guard: bool,
50
51    /// Language for human-facing stderr messages. Accepts `en` or `pt`.
52    ///
53    /// Without the flag, detection falls back to `SQLITE_GRAPHRAG_LANG` and then
54    /// `LC_ALL`/`LANG`. JSON stdout stays deterministic and identical across
55    /// languages; only human-facing strings are affected.
56    #[arg(long, global = true, value_enum, value_name = "LANG")]
57    pub lang: Option<crate::i18n::Language>,
58
59    /// Time zone for `*_iso` fields in JSON output (for example `America/Sao_Paulo`).
60    ///
61    /// Accepts any IANA time zone name. Without the flag, it falls back to
62    /// `SQLITE_GRAPHRAG_DISPLAY_TZ`; if unset, UTC is used. Integer epoch fields
63    /// are not affected.
64    #[arg(long, global = true, value_name = "IANA")]
65    pub tz: Option<chrono_tz::Tz>,
66
67    /// Increase logging verbosity (-v=info, -vv=debug, -vvv=trace).
68    ///
69    /// Overrides `SQLITE_GRAPHRAG_LOG_LEVEL` env var when present. Logs are emitted
70    /// to stderr; JSON stdout is unaffected.
71    #[arg(short = 'v', long, global = true, action = clap::ArgAction::Count)]
72    pub verbose: u8,
73
74    /// v1.0.75 (G21 solution): extraction backend selector. Accepts
75    /// `llm` (default), `embedding` (legacy), `none`, or `both` (composite).
76    /// The `llm` backend invokes claude code / codex CLI headless to extract
77    /// entities and relationships; `embedding` is a permanent stub since
78    /// v1.0.79 (legacy fastembed pipeline removed) that returns a clear
79    /// migration error.
80    #[arg(long, global = true, value_name = "KIND", default_value = "llm")]
81    pub extraction_backend: Option<String>,
82
83    /// v1.0.79 (G42/S1): embedding dimensionality override (default 64).
84    ///
85    /// Precedence: this flag > `SQLITE_GRAPHRAG_EMBEDDING_DIM` env var >
86    /// the `dim` recorded in the database `schema_meta` > 64. Existing
87    /// databases keep their recorded dimensionality automatically; use
88    /// this flag only to migrate a corpus to a new dimensionality
89    /// (followed by `enrich --operation re-embed`). Range: [8, 4096].
90    #[arg(long, global = true, value_name = "N", value_parser = clap::value_parser!(u64).range(8..=4096))]
91    pub embedding_dim: Option<u64>,
92
93    #[command(subcommand)]
94    pub command: Commands,
95}
96
97#[cfg(test)]
98mod json_only_format_tests {
99    use super::Cli;
100    use clap::Parser;
101
102    #[test]
103    fn restore_accepts_only_format_json() {
104        assert!(Cli::try_parse_from([
105            "sqlite-graphrag",
106            "restore",
107            "--name",
108            "mem",
109            "--version",
110            "1",
111            "--format",
112            "json",
113        ])
114        .is_ok());
115
116        assert!(Cli::try_parse_from([
117            "sqlite-graphrag",
118            "restore",
119            "--name",
120            "mem",
121            "--version",
122            "1",
123            "--format",
124            "text",
125        ])
126        .is_err());
127    }
128
129    #[test]
130    fn hybrid_search_accepts_only_format_json() {
131        assert!(Cli::try_parse_from([
132            "sqlite-graphrag",
133            "hybrid-search",
134            "query",
135            "--format",
136            "json",
137        ])
138        .is_ok());
139
140        assert!(Cli::try_parse_from([
141            "sqlite-graphrag",
142            "hybrid-search",
143            "query",
144            "--format",
145            "markdown",
146        ])
147        .is_err());
148    }
149
150    #[test]
151    fn remember_recall_rename_vacuum_json_only() {
152        assert!(Cli::try_parse_from([
153            "sqlite-graphrag",
154            "remember",
155            "--name",
156            "mem",
157            "--type",
158            "project",
159            "--description",
160            "desc",
161            "--format",
162            "json",
163        ])
164        .is_ok());
165        assert!(Cli::try_parse_from([
166            "sqlite-graphrag",
167            "remember",
168            "--name",
169            "mem",
170            "--type",
171            "project",
172            "--description",
173            "desc",
174            "--format",
175            "text",
176        ])
177        .is_err());
178
179        assert!(
180            Cli::try_parse_from(["sqlite-graphrag", "recall", "query", "--format", "json",])
181                .is_ok()
182        );
183        assert!(
184            Cli::try_parse_from(["sqlite-graphrag", "recall", "query", "--format", "text",])
185                .is_err()
186        );
187
188        assert!(Cli::try_parse_from([
189            "sqlite-graphrag",
190            "rename",
191            "--name",
192            "old",
193            "--new-name",
194            "new",
195            "--format",
196            "json",
197        ])
198        .is_ok());
199        assert!(Cli::try_parse_from([
200            "sqlite-graphrag",
201            "rename",
202            "--name",
203            "old",
204            "--new-name",
205            "new",
206            "--format",
207            "markdown",
208        ])
209        .is_err());
210
211        assert!(Cli::try_parse_from(["sqlite-graphrag", "vacuum", "--format", "json",]).is_ok());
212        assert!(Cli::try_parse_from(["sqlite-graphrag", "vacuum", "--format", "text",]).is_err());
213    }
214}
215
216impl Cli {
217    /// Validates concurrency flags and returns a localised descriptive error if invalid.
218    ///
219    /// Requires that `crate::i18n::init()` has already been called (happens before this
220    /// function in the `main` flow). In English it emits EN messages; in Portuguese it emits PT.
221    pub fn validate_flags(&self) -> Result<(), String> {
222        if let Some(n) = self.max_concurrency {
223            if n == 0 {
224                return Err(match current() {
225                    Language::English => "--max-concurrency must be >= 1".to_string(),
226                    Language::Portuguese => "--max-concurrency deve ser >= 1".to_string(),
227                });
228            }
229            let teto = max_concurrency_ceiling();
230            if n > teto {
231                return Err(match current() {
232                    Language::English => format!(
233                        "--max-concurrency {n} exceeds the ceiling of {teto} (2×nCPUs) on this system"
234                    ),
235                    Language::Portuguese => format!(
236                        "--max-concurrency {n} excede o teto de {teto} (2×nCPUs) neste sistema"
237                    ),
238                });
239            }
240        }
241        Ok(())
242    }
243}
244
245impl Commands {
246    /// Returns true for subcommands that load the ONNX model locally.
247    pub fn is_embedding_heavy(&self) -> bool {
248        matches!(
249            self,
250            Self::Init(_)
251                | Self::Remember(_)
252                | Self::RememberBatch(_)
253                | Self::Recall(_)
254                | Self::HybridSearch(_)
255                | Self::DeepResearch(_)
256        )
257    }
258
259    pub fn uses_cli_slot(&self) -> bool {
260        true
261    }
262}
263
264#[derive(Subcommand)]
265pub enum Commands {
266    /// Initialize database and download embedding model
267    #[command(after_long_help = "EXAMPLES:\n  \
268        # Initialize in current directory (default behavior)\n  \
269        sqlite-graphrag init\n\n  \
270        # Initialize at a specific path\n  \
271        sqlite-graphrag init --db /path/to/graphrag.sqlite\n\n  \
272        # Initialize using SQLITE_GRAPHRAG_HOME env var\n  \
273        SQLITE_GRAPHRAG_HOME=/data sqlite-graphrag init\n\n\
274        NOTES:\n  \
275        - `init` is OPTIONAL: any subsequent CRUD command auto-initializes graphrag.sqlite if missing.\n  \
276        - As a side effect, `init` warms a smoke-test embedding via the LLM-only one-shot pipeline.")]
277    Init(init::InitArgs),
278    /// Save a memory with optional entity graph
279    #[command(after_long_help = "EXAMPLES:\n  \
280        # Inline body\n  \
281        sqlite-graphrag remember --name onboarding --type user --description \"intro\" --body \"hello\"\n\n  \
282        # Body from file\n  \
283        sqlite-graphrag remember --name doc1 --type document --description \"...\" --body-file ./README.md\n\n  \
284        # Body from stdin (pipe)\n  \
285        cat README.md | sqlite-graphrag remember --name doc1 --type document --description \"...\" --body-stdin\n\n  \
286        # Enable automatic URL extraction (URL-regex only since v1.0.79; GLiNER removed)\n  \
287        sqlite-graphrag remember --name rich --type note --description \"...\" --body \"...\" --enable-ner")]
288    Remember(remember::RememberArgs),
289    /// Batch-create memories from NDJSON stdin (one invocation, one slot)
290    #[command(after_long_help = "EXAMPLES:\n  \
291        # Batch create from NDJSON\n  \
292        cat memories.ndjson | sqlite-graphrag remember-batch --force-merge --json\n\n  \
293        # Atomic batch\n  \
294        cat memories.ndjson | sqlite-graphrag remember-batch --transaction --json")]
295    RememberBatch(remember_batch::RememberBatchArgs),
296    /// Bulk-ingest every file under a directory as separate memories (NDJSON output)
297    Ingest(ingest::IngestArgs),
298    /// Search memories semantically
299    #[command(after_long_help = "EXAMPLES:\n  \
300        # Top 10 semantic matches (default)\n  \
301        sqlite-graphrag recall \"agent memory\"\n\n  \
302        # Top 3 only\n  \
303        sqlite-graphrag recall \"agent memory\" -k 3\n\n  \
304        # Search across all namespaces\n  \
305        sqlite-graphrag recall \"agent memory\" --all-namespaces\n\n  \
306        # Disable graph traversal (vector-only)\n  \
307        sqlite-graphrag recall \"agent memory\" --no-graph")]
308    Recall(recall::RecallArgs),
309    /// Read a memory by exact name
310    Read(read::ReadArgs),
311    /// List memories with filters
312    List(list::ListArgs),
313    /// Soft-delete a memory
314    Forget(forget::ForgetArgs),
315    /// Permanently delete soft-deleted memories
316    Purge(purge::PurgeArgs),
317    /// Rename a memory preserving history
318    Rename(rename::RenameArgs),
319    /// Edit a memory's body or description
320    Edit(edit::EditArgs),
321    /// List all versions of a memory
322    History(history::HistoryArgs),
323    /// Restore a memory to a previous version
324    Restore(restore::RestoreArgs),
325    /// Search using hybrid vector + full-text search
326    #[command(after_long_help = "EXAMPLES:\n  \
327        # Hybrid search combining KNN + FTS5 BM25 with RRF\n  \
328        sqlite-graphrag hybrid-search \"agent memory architecture\"\n\n  \
329        # Custom weights for vector vs full-text components\n  \
330        sqlite-graphrag hybrid-search \"agent\" --weight-vec 0.7 --weight-fts 0.3")]
331    HybridSearch(hybrid_search::HybridSearchArgs),
332    /// Show database health
333    Health(health::HealthArgs),
334    /// Apply pending schema migrations
335    Migrate(migrate::MigrateArgs),
336    /// Resolve namespace precedence for the current invocation
337    NamespaceDetect(namespace_detect::NamespaceDetectArgs),
338    /// Run PRAGMA optimize on the database
339    Optimize(optimize::OptimizeArgs),
340    /// Show database statistics
341    Stats(stats::StatsArgs),
342    /// Create a checkpointed copy safe for file sync
343    SyncSafeCopy(sync_safe_copy::SyncSafeCopyArgs),
344    /// Back up the database using the SQLite Online Backup API
345    Backup(backup::BackupArgs),
346    /// Run VACUUM after checkpointing the WAL
347    Vacuum(vacuum::VacuumArgs),
348    /// Create an explicit relationship between two entities
349    Link(link::LinkArgs),
350    /// Remove a specific relationship between two entities
351    Unlink(unlink::UnlinkArgs),
352    /// Deep parallel multi-hop GraphRAG research
353    #[command(name = "deep-research")]
354    DeepResearch(deep_research::DeepResearchArgs),
355    /// List memories connected via the entity graph
356    Related(related::RelatedArgs),
357    /// Export a graph snapshot in json, dot or mermaid
358    Graph(graph_export::GraphArgs),
359    /// Export memories as NDJSON (one JSON line per memory, plus a summary line)
360    Export(export::ExportArgs),
361    /// FTS5 full-text search index management (rebuild or check)
362    Fts(fts::FtsArgs),
363    /// Vector index maintenance (orphan detection, purge, stats) — G39
364    Vec(vec::VecArgs),
365    /// List codex OAuth models accepted by ChatGPT Pro (G33).
366    #[command(name = "codex-models")]
367    CodexModels,
368    /// Bulk-delete all relationships of a given type (e.g. mentions)
369    PruneRelations(prune_relations::PruneRelationsArgs),
370    /// Remove NER bindings (memory_entities rows) for an entity or all entities
371    #[command(name = "prune-ner")]
372    PruneNer(prune_ner::PruneNerArgs),
373    /// Remove entities that have no memories and no relationships
374    CleanupOrphans(cleanup_orphans::CleanupOrphansArgs),
375    /// List entities linked to a specific memory
376    MemoryEntities(memory_entities::MemoryEntitiesArgs),
377    /// Manage cached resources (embedding models, etc.)
378    Cache(cache::CacheArgs),
379    /// Delete an entity and all its relationships from the graph
380    #[command(name = "delete-entity")]
381    DeleteEntity(delete_entity::DeleteEntityArgs),
382    /// Reclassify one entity or a batch of entities to a new type
383    Reclassify(reclassify::ReclassifyArgs),
384    /// Rename an entity preserving all relationships and memory bindings
385    #[command(name = "rename-entity")]
386    RenameEntity(rename_entity::RenameEntityArgs),
387    /// Merge multiple source entities into a single target entity
388    #[command(name = "merge-entities")]
389    MergeEntities(merge_entities::MergeEntitiesArgs),
390    /// Enrich graph memories and entities using an LLM provider
391    Enrich(enrich::EnrichArgs),
392    /// Reclassify relationship types across the graph using rules or LLM judgment
393    #[command(name = "reclassify-relation")]
394    ReclassifyRelation(reclassify_relation::ReclassifyRelationArgs),
395    /// Normalize entity names (deduplicate, kebab-case, merge near-duplicates)
396    #[command(name = "normalize-entities")]
397    NormalizeEntities(normalize_entities::NormalizeEntitiesArgs),
398    /// Generate shell completions for Bash, Zsh, Fish, PowerShell, or Elvish
399    Completions(completions::CompletionsArgs),
400    #[command(name = "debug-schema", hide = true)]
401    DebugSchema(debug_schema::DebugSchemaArgs),
402}
403
404#[derive(Copy, Clone, Debug, Default, clap::ValueEnum)]
405pub enum MemoryType {
406    User,
407    Feedback,
408    Project,
409    Reference,
410    Decision,
411    Incident,
412    Skill,
413    #[default]
414    Document,
415    Note,
416}
417
418#[cfg(test)]
419mod heavy_concurrency_tests {
420    use super::*;
421
422    #[test]
423    fn command_heavy_detects_init_and_embeddings() {
424        let init = Cli::try_parse_from(["sqlite-graphrag", "init"]).expect("parse init");
425        assert!(init.command.is_embedding_heavy());
426
427        let remember = Cli::try_parse_from([
428            "sqlite-graphrag",
429            "remember",
430            "--name",
431            "test-memory",
432            "--type",
433            "project",
434            "--description",
435            "desc",
436        ])
437        .expect("parse remember");
438        assert!(remember.command.is_embedding_heavy());
439
440        let recall =
441            Cli::try_parse_from(["sqlite-graphrag", "recall", "query"]).expect("parse recall");
442        assert!(recall.command.is_embedding_heavy());
443
444        let hybrid = Cli::try_parse_from(["sqlite-graphrag", "hybrid-search", "query"])
445            .expect("parse hybrid");
446        assert!(hybrid.command.is_embedding_heavy());
447    }
448
449    #[test]
450    fn command_light_does_not_mark_stats() {
451        let stats = Cli::try_parse_from(["sqlite-graphrag", "stats"]).expect("parse stats");
452        assert!(!stats.command.is_embedding_heavy());
453    }
454}
455
456impl MemoryType {
457    pub fn as_str(&self) -> &'static str {
458        match self {
459            Self::User => "user",
460            Self::Feedback => "feedback",
461            Self::Project => "project",
462            Self::Reference => "reference",
463            Self::Decision => "decision",
464            Self::Incident => "incident",
465            Self::Skill => "skill",
466            Self::Document => "document",
467            Self::Note => "note",
468        }
469    }
470}