Skip to main content

sqlite_graphrag/
cli.rs

1//! CLI argument structs and command surface (clap-based).
2//!
3//! Defines `Cli` and all subcommand enums; contains no business logic.
4
5use crate::commands::*;
6use crate::i18n::{current, Language};
7use clap::{Parser, Subcommand};
8
9/// Common daemon-control options shared across embedding-heavy subcommands.
10#[derive(clap::Args, Debug, Clone)]
11pub struct DaemonOpts {
12    /// Allow the CLI to spawn a background daemon if none is running.
13    ///
14    /// Default `true`. Pass `--autostart-daemon=false` to disable.
15    /// Env var `SQLITE_GRAPHRAG_DAEMON_DISABLE_AUTOSTART=1` is honoured only when this flag is unset.
16    #[arg(long, default_value_t = true, action = clap::ArgAction::Set)]
17    pub autostart_daemon: bool,
18}
19
20/// Returns the maximum simultaneous invocations allowed by the CPU heuristic.
21fn max_concurrency_ceiling() -> usize {
22    std::thread::available_parallelism()
23        .map(|n| n.get() * 2)
24        .unwrap_or(8)
25}
26
27#[derive(Copy, Clone, Debug, PartialEq, Eq, clap::ValueEnum)]
28pub enum GraphExportFormat {
29    Json,
30    Dot,
31    Mermaid,
32    /// Stream one JSON object per entity, then one per edge, then a summary line.
33    Ndjson,
34}
35
36#[derive(Parser)]
37#[command(name = "sqlite-graphrag")]
38#[command(version)]
39#[command(about = "Local GraphRAG memory for LLMs in a single SQLite file")]
40#[command(arg_required_else_help = true)]
41pub struct Cli {
42    /// Maximum number of simultaneous CLI invocations allowed (default: 4).
43    ///
44    /// Caps the counting semaphore used for CLI concurrency slots. The value must
45    /// stay within [1, 2×nCPUs]. Values above the ceiling are rejected with exit 2.
46    #[arg(long, global = true, value_name = "N")]
47    pub max_concurrency: Option<usize>,
48
49    /// Wait up to SECONDS for a free concurrency slot before giving up (exit 75).
50    ///
51    /// Useful in retrying agent pipelines: the process polls every 500 ms until a
52    /// slot opens or the timeout expires. Default: 300s (5 minutes).
53    #[arg(long, global = true, value_name = "SECONDS")]
54    pub wait_lock: Option<u64>,
55
56    /// Skip the available-memory check before loading the model.
57    ///
58    /// Exclusive use in automated tests where real allocation does not occur.
59    #[arg(long, global = true, hide = true, default_value_t = false)]
60    pub skip_memory_guard: bool,
61
62    /// Language for human-facing stderr messages. Accepts `en` or `pt`.
63    ///
64    /// Without the flag, detection falls back to `SQLITE_GRAPHRAG_LANG` and then
65    /// `LC_ALL`/`LANG`. JSON stdout stays deterministic and identical across
66    /// languages; only human-facing strings are affected.
67    #[arg(long, global = true, value_enum, value_name = "LANG")]
68    pub lang: Option<crate::i18n::Language>,
69
70    /// Time zone for `*_iso` fields in JSON output (for example `America/Sao_Paulo`).
71    ///
72    /// Accepts any IANA time zone name. Without the flag, it falls back to
73    /// `SQLITE_GRAPHRAG_DISPLAY_TZ`; if unset, UTC is used. Integer epoch fields
74    /// are not affected.
75    #[arg(long, global = true, value_name = "IANA")]
76    pub tz: Option<chrono_tz::Tz>,
77
78    /// Increase logging verbosity (-v=info, -vv=debug, -vvv=trace).
79    ///
80    /// Overrides `SQLITE_GRAPHRAG_LOG_LEVEL` env var when present. Logs are emitted
81    /// to stderr; JSON stdout is unaffected.
82    #[arg(short = 'v', long, global = true, action = clap::ArgAction::Count)]
83    pub verbose: u8,
84
85    #[command(subcommand)]
86    pub command: Commands,
87}
88
89#[cfg(test)]
90mod json_only_format_tests {
91    use super::Cli;
92    use clap::Parser;
93
94    #[test]
95    fn restore_accepts_only_format_json() {
96        assert!(Cli::try_parse_from([
97            "sqlite-graphrag",
98            "restore",
99            "--name",
100            "mem",
101            "--version",
102            "1",
103            "--format",
104            "json",
105        ])
106        .is_ok());
107
108        assert!(Cli::try_parse_from([
109            "sqlite-graphrag",
110            "restore",
111            "--name",
112            "mem",
113            "--version",
114            "1",
115            "--format",
116            "text",
117        ])
118        .is_err());
119    }
120
121    #[test]
122    fn hybrid_search_accepts_only_format_json() {
123        assert!(Cli::try_parse_from([
124            "sqlite-graphrag",
125            "hybrid-search",
126            "query",
127            "--format",
128            "json",
129        ])
130        .is_ok());
131
132        assert!(Cli::try_parse_from([
133            "sqlite-graphrag",
134            "hybrid-search",
135            "query",
136            "--format",
137            "markdown",
138        ])
139        .is_err());
140    }
141
142    #[test]
143    fn remember_recall_rename_vacuum_json_only() {
144        assert!(Cli::try_parse_from([
145            "sqlite-graphrag",
146            "remember",
147            "--name",
148            "mem",
149            "--type",
150            "project",
151            "--description",
152            "desc",
153            "--format",
154            "json",
155        ])
156        .is_ok());
157        assert!(Cli::try_parse_from([
158            "sqlite-graphrag",
159            "remember",
160            "--name",
161            "mem",
162            "--type",
163            "project",
164            "--description",
165            "desc",
166            "--format",
167            "text",
168        ])
169        .is_err());
170
171        assert!(
172            Cli::try_parse_from(["sqlite-graphrag", "recall", "query", "--format", "json",])
173                .is_ok()
174        );
175        assert!(
176            Cli::try_parse_from(["sqlite-graphrag", "recall", "query", "--format", "text",])
177                .is_err()
178        );
179
180        assert!(Cli::try_parse_from([
181            "sqlite-graphrag",
182            "rename",
183            "--name",
184            "old",
185            "--new-name",
186            "new",
187            "--format",
188            "json",
189        ])
190        .is_ok());
191        assert!(Cli::try_parse_from([
192            "sqlite-graphrag",
193            "rename",
194            "--name",
195            "old",
196            "--new-name",
197            "new",
198            "--format",
199            "markdown",
200        ])
201        .is_err());
202
203        assert!(Cli::try_parse_from(["sqlite-graphrag", "vacuum", "--format", "json",]).is_ok());
204        assert!(Cli::try_parse_from(["sqlite-graphrag", "vacuum", "--format", "text",]).is_err());
205    }
206}
207
208impl Cli {
209    /// Validates concurrency flags and returns a localised descriptive error if invalid.
210    ///
211    /// Requires that `crate::i18n::init()` has already been called (happens before this
212    /// function in the `main` flow). In English it emits EN messages; in Portuguese it emits PT.
213    pub fn validate_flags(&self) -> Result<(), String> {
214        if let Some(n) = self.max_concurrency {
215            if n == 0 {
216                return Err(match current() {
217                    Language::English => "--max-concurrency must be >= 1".to_string(),
218                    Language::Portuguese => "--max-concurrency deve ser >= 1".to_string(),
219                });
220            }
221            let teto = max_concurrency_ceiling();
222            if n > teto {
223                return Err(match current() {
224                    Language::English => format!(
225                        "--max-concurrency {n} exceeds the ceiling of {teto} (2×nCPUs) on this system"
226                    ),
227                    Language::Portuguese => format!(
228                        "--max-concurrency {n} excede o teto de {teto} (2×nCPUs) neste sistema"
229                    ),
230                });
231            }
232        }
233        Ok(())
234    }
235}
236
237impl Commands {
238    /// Returns true for subcommands that load the ONNX model locally.
239    pub fn is_embedding_heavy(&self) -> bool {
240        matches!(
241            self,
242            Self::Init(_) | Self::Remember(_) | Self::Recall(_) | Self::HybridSearch(_)
243        )
244    }
245
246    pub fn uses_cli_slot(&self) -> bool {
247        !matches!(self, Self::Daemon(_))
248    }
249}
250
251#[derive(Subcommand)]
252pub enum Commands {
253    /// Initialize database and download embedding model
254    #[command(after_long_help = "EXAMPLES:\n  \
255        # Initialize in current directory (default behavior)\n  \
256        sqlite-graphrag init\n\n  \
257        # Initialize at a specific path\n  \
258        sqlite-graphrag init --db /path/to/graphrag.sqlite\n\n  \
259        # Initialize using SQLITE_GRAPHRAG_HOME env var\n  \
260        SQLITE_GRAPHRAG_HOME=/data sqlite-graphrag init\n\n\
261        NOTES:\n  \
262        - `init` is OPTIONAL: any subsequent CRUD command auto-initializes graphrag.sqlite if missing.\n  \
263        - As a side effect, `init` warms a smoke-test embedding which auto-spawns the persistent daemon (~600s idle timeout).")]
264    Init(init::InitArgs),
265    /// Run or control the persistent embedding daemon
266    Daemon(daemon::DaemonArgs),
267    /// Save a memory with optional entity graph
268    #[command(after_long_help = "EXAMPLES:\n  \
269        # Inline body\n  \
270        sqlite-graphrag remember --name onboarding --type user --description \"intro\" --body \"hello\"\n\n  \
271        # Body from file\n  \
272        sqlite-graphrag remember --name doc1 --type document --description \"...\" --body-file ./README.md\n\n  \
273        # Body from stdin (pipe)\n  \
274        cat README.md | sqlite-graphrag remember --name doc1 --type document --description \"...\" --body-stdin\n\n  \
275        # Enable GLiNER entity extraction (disabled by default)\n  \
276        sqlite-graphrag remember --name rich --type note --description \"...\" --body \"...\" --enable-ner")]
277    Remember(remember::RememberArgs),
278    /// Bulk-ingest every file under a directory as separate memories (NDJSON output)
279    Ingest(ingest::IngestArgs),
280    /// Search memories semantically
281    #[command(after_long_help = "EXAMPLES:\n  \
282        # Top 10 semantic matches (default)\n  \
283        sqlite-graphrag recall \"agent memory\"\n\n  \
284        # Top 3 only\n  \
285        sqlite-graphrag recall \"agent memory\" -k 3\n\n  \
286        # Search across all namespaces\n  \
287        sqlite-graphrag recall \"agent memory\" --all-namespaces\n\n  \
288        # Disable graph traversal (vector-only)\n  \
289        sqlite-graphrag recall \"agent memory\" --no-graph")]
290    Recall(recall::RecallArgs),
291    /// Read a memory by exact name
292    Read(read::ReadArgs),
293    /// List memories with filters
294    List(list::ListArgs),
295    /// Soft-delete a memory
296    Forget(forget::ForgetArgs),
297    /// Permanently delete soft-deleted memories
298    Purge(purge::PurgeArgs),
299    /// Rename a memory preserving history
300    Rename(rename::RenameArgs),
301    /// Edit a memory's body or description
302    Edit(edit::EditArgs),
303    /// List all versions of a memory
304    History(history::HistoryArgs),
305    /// Restore a memory to a previous version
306    Restore(restore::RestoreArgs),
307    /// Search using hybrid vector + full-text search
308    #[command(after_long_help = "EXAMPLES:\n  \
309        # Hybrid search combining KNN + FTS5 BM25 with RRF\n  \
310        sqlite-graphrag hybrid-search \"agent memory architecture\"\n\n  \
311        # Custom weights for vector vs full-text components\n  \
312        sqlite-graphrag hybrid-search \"agent\" --weight-vec 0.7 --weight-fts 0.3")]
313    HybridSearch(hybrid_search::HybridSearchArgs),
314    /// Show database health
315    Health(health::HealthArgs),
316    /// Apply pending schema migrations
317    Migrate(migrate::MigrateArgs),
318    /// Resolve namespace precedence for the current invocation
319    NamespaceDetect(namespace_detect::NamespaceDetectArgs),
320    /// Run PRAGMA optimize on the database
321    Optimize(optimize::OptimizeArgs),
322    /// Show database statistics
323    Stats(stats::StatsArgs),
324    /// Create a checkpointed copy safe for file sync
325    SyncSafeCopy(sync_safe_copy::SyncSafeCopyArgs),
326    /// Back up the database using the SQLite Online Backup API
327    Backup(backup::BackupArgs),
328    /// Run VACUUM after checkpointing the WAL
329    Vacuum(vacuum::VacuumArgs),
330    /// Create an explicit relationship between two entities
331    Link(link::LinkArgs),
332    /// Remove a specific relationship between two entities
333    Unlink(unlink::UnlinkArgs),
334    /// List memories connected via the entity graph
335    Related(related::RelatedArgs),
336    /// Export a graph snapshot in json, dot or mermaid
337    Graph(graph_export::GraphArgs),
338    /// Export memories as NDJSON (one JSON line per memory, plus a summary line)
339    Export(export::ExportArgs),
340    /// FTS5 full-text search index management (rebuild or check)
341    Fts(fts::FtsArgs),
342    /// Bulk-delete all relationships of a given type (e.g. mentions)
343    PruneRelations(prune_relations::PruneRelationsArgs),
344    /// Remove NER bindings (memory_entities rows) for an entity or all entities
345    #[command(name = "prune-ner")]
346    PruneNer(prune_ner::PruneNerArgs),
347    /// Remove entities that have no memories and no relationships
348    CleanupOrphans(cleanup_orphans::CleanupOrphansArgs),
349    /// List entities linked to a specific memory
350    MemoryEntities(memory_entities::MemoryEntitiesArgs),
351    /// Manage cached resources (embedding models, etc.)
352    Cache(cache::CacheArgs),
353    /// Delete an entity and all its relationships from the graph
354    #[command(name = "delete-entity")]
355    DeleteEntity(delete_entity::DeleteEntityArgs),
356    /// Reclassify one entity or a batch of entities to a new type
357    Reclassify(reclassify::ReclassifyArgs),
358    /// Rename an entity preserving all relationships and memory bindings
359    #[command(name = "rename-entity")]
360    RenameEntity(rename_entity::RenameEntityArgs),
361    /// Merge multiple source entities into a single target entity
362    #[command(name = "merge-entities")]
363    MergeEntities(merge_entities::MergeEntitiesArgs),
364    #[command(name = "__debug_schema", hide = true)]
365    DebugSchema(debug_schema::DebugSchemaArgs),
366}
367
368#[derive(Copy, Clone, Debug, Default, clap::ValueEnum)]
369pub enum MemoryType {
370    User,
371    Feedback,
372    Project,
373    Reference,
374    Decision,
375    Incident,
376    Skill,
377    #[default]
378    Document,
379    Note,
380}
381
382#[cfg(test)]
383mod heavy_concurrency_tests {
384    use super::*;
385
386    #[test]
387    fn command_heavy_detects_init_and_embeddings() {
388        let init = Cli::try_parse_from(["sqlite-graphrag", "init"]).expect("parse init");
389        assert!(init.command.is_embedding_heavy());
390
391        let remember = Cli::try_parse_from([
392            "sqlite-graphrag",
393            "remember",
394            "--name",
395            "test-memory",
396            "--type",
397            "project",
398            "--description",
399            "desc",
400        ])
401        .expect("parse remember");
402        assert!(remember.command.is_embedding_heavy());
403
404        let recall =
405            Cli::try_parse_from(["sqlite-graphrag", "recall", "query"]).expect("parse recall");
406        assert!(recall.command.is_embedding_heavy());
407
408        let hybrid = Cli::try_parse_from(["sqlite-graphrag", "hybrid-search", "query"])
409            .expect("parse hybrid");
410        assert!(hybrid.command.is_embedding_heavy());
411    }
412
413    #[test]
414    fn command_light_does_not_mark_stats() {
415        let stats = Cli::try_parse_from(["sqlite-graphrag", "stats"]).expect("parse stats");
416        assert!(!stats.command.is_embedding_heavy());
417    }
418}
419
420impl MemoryType {
421    pub fn as_str(&self) -> &'static str {
422        match self {
423            Self::User => "user",
424            Self::Feedback => "feedback",
425            Self::Project => "project",
426            Self::Reference => "reference",
427            Self::Decision => "decision",
428            Self::Incident => "incident",
429            Self::Skill => "skill",
430            Self::Document => "document",
431            Self::Note => "note",
432        }
433    }
434}