Skip to main content

sqlite_graphrag/
cli.rs

1//! CLI argument structs and command surface (clap-based).
2//!
3//! Defines `Cli` and all subcommand enums; contains no business logic.
4
5use crate::commands::*;
6use crate::i18n::{current, Language};
7use clap::{Parser, Subcommand};
8
9/// Common daemon-control options shared across embedding-heavy subcommands.
10#[derive(clap::Args, Debug, Clone)]
11pub struct DaemonOpts {
12    /// Allow the CLI to spawn a background daemon if none is running.
13    ///
14    /// Default `true`. Pass `--autostart-daemon=false` to disable.
15    /// Env var `SQLITE_GRAPHRAG_DAEMON_DISABLE_AUTOSTART=1` is honoured only when this flag is unset.
16    #[arg(long, default_value_t = true, action = clap::ArgAction::Set)]
17    pub autostart_daemon: bool,
18}
19
20/// Returns the maximum simultaneous invocations allowed by the CPU heuristic.
21fn max_concurrency_ceiling() -> usize {
22    std::thread::available_parallelism()
23        .map(|n| n.get() * 2)
24        .unwrap_or(8)
25}
26
27#[derive(Copy, Clone, Debug, clap::ValueEnum)]
28pub enum RelationKind {
29    AppliesTo,
30    Uses,
31    DependsOn,
32    Causes,
33    Fixes,
34    Contradicts,
35    Supports,
36    Follows,
37    Related,
38    Mentions,
39    Replaces,
40    TrackedIn,
41}
42
43impl RelationKind {
44    pub fn as_str(&self) -> &'static str {
45        match self {
46            Self::AppliesTo => "applies_to",
47            Self::Uses => "uses",
48            Self::DependsOn => "depends_on",
49            Self::Causes => "causes",
50            Self::Fixes => "fixes",
51            Self::Contradicts => "contradicts",
52            Self::Supports => "supports",
53            Self::Follows => "follows",
54            Self::Related => "related",
55            Self::Mentions => "mentions",
56            Self::Replaces => "replaces",
57            Self::TrackedIn => "tracked_in",
58        }
59    }
60}
61
62#[derive(Copy, Clone, Debug, clap::ValueEnum)]
63pub enum GraphExportFormat {
64    Json,
65    Dot,
66    Mermaid,
67}
68
69#[derive(Parser)]
70#[command(name = "sqlite-graphrag")]
71#[command(version)]
72#[command(about = "Local GraphRAG memory for LLMs in a single SQLite file")]
73#[command(arg_required_else_help = true)]
74pub struct Cli {
75    /// Maximum number of simultaneous CLI invocations allowed (default: 4).
76    ///
77    /// Caps the counting semaphore used for CLI concurrency slots. The value must
78    /// stay within [1, 2×nCPUs]. Values above the ceiling are rejected with exit 2.
79    #[arg(long, global = true, value_name = "N")]
80    pub max_concurrency: Option<usize>,
81
82    /// Wait up to SECONDS for a free concurrency slot before giving up (exit 75).
83    ///
84    /// Useful in retrying agent pipelines: the process polls every 500 ms until a
85    /// slot opens or the timeout expires. Default: 300s (5 minutes).
86    #[arg(long, global = true, value_name = "SECONDS")]
87    pub wait_lock: Option<u64>,
88
89    /// Skip the available-memory check before loading the model.
90    ///
91    /// Exclusive use in automated tests where real allocation does not occur.
92    #[arg(long, global = true, hide = true, default_value_t = false)]
93    pub skip_memory_guard: bool,
94
95    /// Language for human-facing stderr messages. Accepts `en` or `pt`.
96    ///
97    /// Without the flag, detection falls back to `SQLITE_GRAPHRAG_LANG` and then
98    /// `LC_ALL`/`LANG`. JSON stdout stays deterministic and identical across
99    /// languages; only human-facing strings are affected.
100    #[arg(long, global = true, value_enum, value_name = "LANG")]
101    pub lang: Option<crate::i18n::Language>,
102
103    /// Time zone for `*_iso` fields in JSON output (for example `America/Sao_Paulo`).
104    ///
105    /// Accepts any IANA time zone name. Without the flag, it falls back to
106    /// `SQLITE_GRAPHRAG_DISPLAY_TZ`; if unset, UTC is used. Integer epoch fields
107    /// are not affected.
108    #[arg(long, global = true, value_name = "IANA")]
109    pub tz: Option<chrono_tz::Tz>,
110
111    /// Increase logging verbosity (-v=info, -vv=debug, -vvv=trace).
112    ///
113    /// Overrides `SQLITE_GRAPHRAG_LOG_LEVEL` env var when present. Logs are emitted
114    /// to stderr; JSON stdout is unaffected.
115    #[arg(short = 'v', long, global = true, action = clap::ArgAction::Count)]
116    pub verbose: u8,
117
118    #[command(subcommand)]
119    pub command: Commands,
120}
121
122#[cfg(test)]
123mod json_only_format_tests {
124    use super::Cli;
125    use clap::Parser;
126
127    #[test]
128    fn restore_accepts_only_format_json() {
129        assert!(Cli::try_parse_from([
130            "sqlite-graphrag",
131            "restore",
132            "--name",
133            "mem",
134            "--version",
135            "1",
136            "--format",
137            "json",
138        ])
139        .is_ok());
140
141        assert!(Cli::try_parse_from([
142            "sqlite-graphrag",
143            "restore",
144            "--name",
145            "mem",
146            "--version",
147            "1",
148            "--format",
149            "text",
150        ])
151        .is_err());
152    }
153
154    #[test]
155    fn hybrid_search_accepts_only_format_json() {
156        assert!(Cli::try_parse_from([
157            "sqlite-graphrag",
158            "hybrid-search",
159            "query",
160            "--format",
161            "json",
162        ])
163        .is_ok());
164
165        assert!(Cli::try_parse_from([
166            "sqlite-graphrag",
167            "hybrid-search",
168            "query",
169            "--format",
170            "markdown",
171        ])
172        .is_err());
173    }
174
175    #[test]
176    fn remember_recall_rename_vacuum_json_only() {
177        assert!(Cli::try_parse_from([
178            "sqlite-graphrag",
179            "remember",
180            "--name",
181            "mem",
182            "--type",
183            "project",
184            "--description",
185            "desc",
186            "--format",
187            "json",
188        ])
189        .is_ok());
190        assert!(Cli::try_parse_from([
191            "sqlite-graphrag",
192            "remember",
193            "--name",
194            "mem",
195            "--type",
196            "project",
197            "--description",
198            "desc",
199            "--format",
200            "text",
201        ])
202        .is_err());
203
204        assert!(
205            Cli::try_parse_from(["sqlite-graphrag", "recall", "query", "--format", "json",])
206                .is_ok()
207        );
208        assert!(
209            Cli::try_parse_from(["sqlite-graphrag", "recall", "query", "--format", "text",])
210                .is_err()
211        );
212
213        assert!(Cli::try_parse_from([
214            "sqlite-graphrag",
215            "rename",
216            "--name",
217            "old",
218            "--new-name",
219            "new",
220            "--format",
221            "json",
222        ])
223        .is_ok());
224        assert!(Cli::try_parse_from([
225            "sqlite-graphrag",
226            "rename",
227            "--name",
228            "old",
229            "--new-name",
230            "new",
231            "--format",
232            "markdown",
233        ])
234        .is_err());
235
236        assert!(Cli::try_parse_from(["sqlite-graphrag", "vacuum", "--format", "json",]).is_ok());
237        assert!(Cli::try_parse_from(["sqlite-graphrag", "vacuum", "--format", "text",]).is_err());
238    }
239}
240
241impl Cli {
242    /// Validates concurrency flags and returns a localised descriptive error if invalid.
243    ///
244    /// Requires that `crate::i18n::init()` has already been called (happens before this
245    /// function in the `main` flow). In English it emits EN messages; in Portuguese it emits PT.
246    pub fn validate_flags(&self) -> Result<(), String> {
247        if let Some(n) = self.max_concurrency {
248            if n == 0 {
249                return Err(match current() {
250                    Language::English => "--max-concurrency must be >= 1".to_string(),
251                    Language::Portuguese => "--max-concurrency deve ser >= 1".to_string(),
252                });
253            }
254            let teto = max_concurrency_ceiling();
255            if n > teto {
256                return Err(match current() {
257                    Language::English => format!(
258                        "--max-concurrency {n} exceeds the ceiling of {teto} (2×nCPUs) on this system"
259                    ),
260                    Language::Portuguese => format!(
261                        "--max-concurrency {n} excede o teto de {teto} (2×nCPUs) neste sistema"
262                    ),
263                });
264            }
265        }
266        Ok(())
267    }
268}
269
270impl Commands {
271    /// Returns true for subcommands that load the ONNX model locally.
272    pub fn is_embedding_heavy(&self) -> bool {
273        matches!(
274            self,
275            Self::Init(_) | Self::Remember(_) | Self::Recall(_) | Self::HybridSearch(_)
276        )
277    }
278
279    pub fn uses_cli_slot(&self) -> bool {
280        !matches!(self, Self::Daemon(_))
281    }
282}
283
284#[derive(Subcommand)]
285pub enum Commands {
286    /// Initialize database and download embedding model
287    #[command(after_long_help = "EXAMPLES:\n  \
288        # Initialize in current directory (default behavior)\n  \
289        sqlite-graphrag init\n\n  \
290        # Initialize at a specific path\n  \
291        sqlite-graphrag init --db /path/to/graphrag.sqlite\n\n  \
292        # Initialize using SQLITE_GRAPHRAG_HOME env var\n  \
293        SQLITE_GRAPHRAG_HOME=/data sqlite-graphrag init\n\n\
294        NOTES:\n  \
295        - `init` is OPTIONAL: any subsequent CRUD command auto-initializes graphrag.sqlite if missing.\n  \
296        - As a side effect, `init` warms a smoke-test embedding which auto-spawns the persistent daemon (~600s idle timeout).")]
297    Init(init::InitArgs),
298    /// Run or control the persistent embedding daemon
299    Daemon(daemon::DaemonArgs),
300    /// Save a memory with optional entity graph
301    #[command(after_long_help = "EXAMPLES:\n  \
302        # Inline body\n  \
303        sqlite-graphrag remember --name onboarding --type user --description \"intro\" --body \"hello\"\n\n  \
304        # Body from file\n  \
305        sqlite-graphrag remember --name doc1 --type document --description \"...\" --body-file ./README.md\n\n  \
306        # Body from stdin (pipe)\n  \
307        cat README.md | sqlite-graphrag remember --name doc1 --type document --description \"...\" --body-stdin\n\n  \
308        # Skip BERT entity extraction (faster)\n  \
309        sqlite-graphrag remember --name quick --type note --description \"...\" --body \"...\" --skip-extraction")]
310    Remember(remember::RememberArgs),
311    /// Bulk-ingest every file under a directory as separate memories (NDJSON output)
312    Ingest(ingest::IngestArgs),
313    /// Search memories semantically
314    #[command(after_long_help = "EXAMPLES:\n  \
315        # Top 10 semantic matches (default)\n  \
316        sqlite-graphrag recall \"agent memory\"\n\n  \
317        # Top 3 only\n  \
318        sqlite-graphrag recall \"agent memory\" -k 3\n\n  \
319        # Search across all namespaces\n  \
320        sqlite-graphrag recall \"agent memory\" --all-namespaces\n\n  \
321        # Disable graph traversal (vector-only)\n  \
322        sqlite-graphrag recall \"agent memory\" --no-graph")]
323    Recall(recall::RecallArgs),
324    /// Read a memory by exact name
325    Read(read::ReadArgs),
326    /// List memories with filters
327    List(list::ListArgs),
328    /// Soft-delete a memory
329    Forget(forget::ForgetArgs),
330    /// Permanently delete soft-deleted memories
331    Purge(purge::PurgeArgs),
332    /// Rename a memory preserving history
333    Rename(rename::RenameArgs),
334    /// Edit a memory's body or description
335    Edit(edit::EditArgs),
336    /// List all versions of a memory
337    History(history::HistoryArgs),
338    /// Restore a memory to a previous version
339    Restore(restore::RestoreArgs),
340    /// Search using hybrid vector + full-text search
341    #[command(after_long_help = "EXAMPLES:\n  \
342        # Hybrid search combining KNN + FTS5 BM25 with RRF\n  \
343        sqlite-graphrag hybrid-search \"agent memory architecture\"\n\n  \
344        # Custom weights for vector vs full-text components\n  \
345        sqlite-graphrag hybrid-search \"agent\" --weight-vec 0.7 --weight-fts 0.3")]
346    HybridSearch(hybrid_search::HybridSearchArgs),
347    /// Show database health
348    Health(health::HealthArgs),
349    /// Apply pending schema migrations
350    Migrate(migrate::MigrateArgs),
351    /// Resolve namespace precedence for the current invocation
352    NamespaceDetect(namespace_detect::NamespaceDetectArgs),
353    /// Run PRAGMA optimize on the database
354    Optimize(optimize::OptimizeArgs),
355    /// Show database statistics
356    Stats(stats::StatsArgs),
357    /// Create a checkpointed copy safe for file sync
358    SyncSafeCopy(sync_safe_copy::SyncSafeCopyArgs),
359    /// Run VACUUM after checkpointing the WAL
360    Vacuum(vacuum::VacuumArgs),
361    /// Create an explicit relationship between two entities
362    Link(link::LinkArgs),
363    /// Remove a specific relationship between two entities
364    Unlink(unlink::UnlinkArgs),
365    /// List memories connected via the entity graph
366    Related(related::RelatedArgs),
367    /// Export a graph snapshot in json, dot or mermaid
368    Graph(graph_export::GraphArgs),
369    /// Remove entities that have no memories and no relationships
370    CleanupOrphans(cleanup_orphans::CleanupOrphansArgs),
371    /// Manage cached resources (embedding models, etc.)
372    Cache(cache::CacheArgs),
373    #[command(name = "__debug_schema", hide = true)]
374    DebugSchema(debug_schema::DebugSchemaArgs),
375}
376
377#[derive(Copy, Clone, Debug, clap::ValueEnum)]
378pub enum MemoryType {
379    User,
380    Feedback,
381    Project,
382    Reference,
383    Decision,
384    Incident,
385    Skill,
386    Document,
387    Note,
388}
389
390#[cfg(test)]
391mod heavy_concurrency_tests {
392    use super::*;
393
394    #[test]
395    fn command_heavy_detects_init_and_embeddings() {
396        let init = Cli::try_parse_from(["sqlite-graphrag", "init"]).expect("parse init");
397        assert!(init.command.is_embedding_heavy());
398
399        let remember = Cli::try_parse_from([
400            "sqlite-graphrag",
401            "remember",
402            "--name",
403            "test-memory",
404            "--type",
405            "project",
406            "--description",
407            "desc",
408        ])
409        .expect("parse remember");
410        assert!(remember.command.is_embedding_heavy());
411
412        let recall =
413            Cli::try_parse_from(["sqlite-graphrag", "recall", "query"]).expect("parse recall");
414        assert!(recall.command.is_embedding_heavy());
415
416        let hybrid = Cli::try_parse_from(["sqlite-graphrag", "hybrid-search", "query"])
417            .expect("parse hybrid");
418        assert!(hybrid.command.is_embedding_heavy());
419    }
420
421    #[test]
422    fn command_light_does_not_mark_stats() {
423        let stats = Cli::try_parse_from(["sqlite-graphrag", "stats"]).expect("parse stats");
424        assert!(!stats.command.is_embedding_heavy());
425    }
426}
427
428impl MemoryType {
429    pub fn as_str(&self) -> &'static str {
430        match self {
431            Self::User => "user",
432            Self::Feedback => "feedback",
433            Self::Project => "project",
434            Self::Reference => "reference",
435            Self::Decision => "decision",
436            Self::Incident => "incident",
437            Self::Skill => "skill",
438            Self::Document => "document",
439            Self::Note => "note",
440        }
441    }
442}