Skip to main content

sqlite_graphrag/
cli.rs

1//! CLI argument structs and command surface (clap-based).
2//!
3//! Defines `Cli` and all subcommand enums; contains no business logic.
4
5use crate::commands::*;
6use crate::i18n::{current, Language};
7use clap::{Parser, Subcommand};
8
9/// Common daemon-control options shared across embedding-heavy subcommands.
10#[derive(clap::Args, Debug, Clone)]
11pub struct DaemonOpts {
12    /// Allow the CLI to spawn a background daemon if none is running.
13    ///
14    /// Default `true`. Pass `--autostart-daemon=false` to disable.
15    /// Env var `SQLITE_GRAPHRAG_DAEMON_DISABLE_AUTOSTART=1` is honoured only when this flag is unset.
16    #[arg(long, default_value_t = true, action = clap::ArgAction::Set)]
17    pub autostart_daemon: bool,
18}
19
20/// Returns the maximum simultaneous invocations allowed by the CPU heuristic.
21fn max_concurrency_ceiling() -> usize {
22    std::thread::available_parallelism()
23        .map(|n| n.get() * 2)
24        .unwrap_or(8)
25}
26
27#[derive(Copy, Clone, Debug, clap::ValueEnum)]
28pub enum GraphExportFormat {
29    Json,
30    Dot,
31    Mermaid,
32}
33
34#[derive(Parser)]
35#[command(name = "sqlite-graphrag")]
36#[command(version)]
37#[command(about = "Local GraphRAG memory for LLMs in a single SQLite file")]
38#[command(arg_required_else_help = true)]
39pub struct Cli {
40    /// Maximum number of simultaneous CLI invocations allowed (default: 4).
41    ///
42    /// Caps the counting semaphore used for CLI concurrency slots. The value must
43    /// stay within [1, 2×nCPUs]. Values above the ceiling are rejected with exit 2.
44    #[arg(long, global = true, value_name = "N")]
45    pub max_concurrency: Option<usize>,
46
47    /// Wait up to SECONDS for a free concurrency slot before giving up (exit 75).
48    ///
49    /// Useful in retrying agent pipelines: the process polls every 500 ms until a
50    /// slot opens or the timeout expires. Default: 300s (5 minutes).
51    #[arg(long, global = true, value_name = "SECONDS")]
52    pub wait_lock: Option<u64>,
53
54    /// Skip the available-memory check before loading the model.
55    ///
56    /// Exclusive use in automated tests where real allocation does not occur.
57    #[arg(long, global = true, hide = true, default_value_t = false)]
58    pub skip_memory_guard: bool,
59
60    /// Language for human-facing stderr messages. Accepts `en` or `pt`.
61    ///
62    /// Without the flag, detection falls back to `SQLITE_GRAPHRAG_LANG` and then
63    /// `LC_ALL`/`LANG`. JSON stdout stays deterministic and identical across
64    /// languages; only human-facing strings are affected.
65    #[arg(long, global = true, value_enum, value_name = "LANG")]
66    pub lang: Option<crate::i18n::Language>,
67
68    /// Time zone for `*_iso` fields in JSON output (for example `America/Sao_Paulo`).
69    ///
70    /// Accepts any IANA time zone name. Without the flag, it falls back to
71    /// `SQLITE_GRAPHRAG_DISPLAY_TZ`; if unset, UTC is used. Integer epoch fields
72    /// are not affected.
73    #[arg(long, global = true, value_name = "IANA")]
74    pub tz: Option<chrono_tz::Tz>,
75
76    /// Increase logging verbosity (-v=info, -vv=debug, -vvv=trace).
77    ///
78    /// Overrides `SQLITE_GRAPHRAG_LOG_LEVEL` env var when present. Logs are emitted
79    /// to stderr; JSON stdout is unaffected.
80    #[arg(short = 'v', long, global = true, action = clap::ArgAction::Count)]
81    pub verbose: u8,
82
83    #[command(subcommand)]
84    pub command: Commands,
85}
86
87#[cfg(test)]
88mod json_only_format_tests {
89    use super::Cli;
90    use clap::Parser;
91
92    #[test]
93    fn restore_accepts_only_format_json() {
94        assert!(Cli::try_parse_from([
95            "sqlite-graphrag",
96            "restore",
97            "--name",
98            "mem",
99            "--version",
100            "1",
101            "--format",
102            "json",
103        ])
104        .is_ok());
105
106        assert!(Cli::try_parse_from([
107            "sqlite-graphrag",
108            "restore",
109            "--name",
110            "mem",
111            "--version",
112            "1",
113            "--format",
114            "text",
115        ])
116        .is_err());
117    }
118
119    #[test]
120    fn hybrid_search_accepts_only_format_json() {
121        assert!(Cli::try_parse_from([
122            "sqlite-graphrag",
123            "hybrid-search",
124            "query",
125            "--format",
126            "json",
127        ])
128        .is_ok());
129
130        assert!(Cli::try_parse_from([
131            "sqlite-graphrag",
132            "hybrid-search",
133            "query",
134            "--format",
135            "markdown",
136        ])
137        .is_err());
138    }
139
140    #[test]
141    fn remember_recall_rename_vacuum_json_only() {
142        assert!(Cli::try_parse_from([
143            "sqlite-graphrag",
144            "remember",
145            "--name",
146            "mem",
147            "--type",
148            "project",
149            "--description",
150            "desc",
151            "--format",
152            "json",
153        ])
154        .is_ok());
155        assert!(Cli::try_parse_from([
156            "sqlite-graphrag",
157            "remember",
158            "--name",
159            "mem",
160            "--type",
161            "project",
162            "--description",
163            "desc",
164            "--format",
165            "text",
166        ])
167        .is_err());
168
169        assert!(
170            Cli::try_parse_from(["sqlite-graphrag", "recall", "query", "--format", "json",])
171                .is_ok()
172        );
173        assert!(
174            Cli::try_parse_from(["sqlite-graphrag", "recall", "query", "--format", "text",])
175                .is_err()
176        );
177
178        assert!(Cli::try_parse_from([
179            "sqlite-graphrag",
180            "rename",
181            "--name",
182            "old",
183            "--new-name",
184            "new",
185            "--format",
186            "json",
187        ])
188        .is_ok());
189        assert!(Cli::try_parse_from([
190            "sqlite-graphrag",
191            "rename",
192            "--name",
193            "old",
194            "--new-name",
195            "new",
196            "--format",
197            "markdown",
198        ])
199        .is_err());
200
201        assert!(Cli::try_parse_from(["sqlite-graphrag", "vacuum", "--format", "json",]).is_ok());
202        assert!(Cli::try_parse_from(["sqlite-graphrag", "vacuum", "--format", "text",]).is_err());
203    }
204}
205
206impl Cli {
207    /// Validates concurrency flags and returns a localised descriptive error if invalid.
208    ///
209    /// Requires that `crate::i18n::init()` has already been called (happens before this
210    /// function in the `main` flow). In English it emits EN messages; in Portuguese it emits PT.
211    pub fn validate_flags(&self) -> Result<(), String> {
212        if let Some(n) = self.max_concurrency {
213            if n == 0 {
214                return Err(match current() {
215                    Language::English => "--max-concurrency must be >= 1".to_string(),
216                    Language::Portuguese => "--max-concurrency deve ser >= 1".to_string(),
217                });
218            }
219            let teto = max_concurrency_ceiling();
220            if n > teto {
221                return Err(match current() {
222                    Language::English => format!(
223                        "--max-concurrency {n} exceeds the ceiling of {teto} (2×nCPUs) on this system"
224                    ),
225                    Language::Portuguese => format!(
226                        "--max-concurrency {n} excede o teto de {teto} (2×nCPUs) neste sistema"
227                    ),
228                });
229            }
230        }
231        Ok(())
232    }
233}
234
235impl Commands {
236    /// Returns true for subcommands that load the ONNX model locally.
237    pub fn is_embedding_heavy(&self) -> bool {
238        matches!(
239            self,
240            Self::Init(_) | Self::Remember(_) | Self::Recall(_) | Self::HybridSearch(_)
241        )
242    }
243
244    pub fn uses_cli_slot(&self) -> bool {
245        !matches!(self, Self::Daemon(_))
246    }
247}
248
249#[derive(Subcommand)]
250pub enum Commands {
251    /// Initialize database and download embedding model
252    #[command(after_long_help = "EXAMPLES:\n  \
253        # Initialize in current directory (default behavior)\n  \
254        sqlite-graphrag init\n\n  \
255        # Initialize at a specific path\n  \
256        sqlite-graphrag init --db /path/to/graphrag.sqlite\n\n  \
257        # Initialize using SQLITE_GRAPHRAG_HOME env var\n  \
258        SQLITE_GRAPHRAG_HOME=/data sqlite-graphrag init\n\n\
259        NOTES:\n  \
260        - `init` is OPTIONAL: any subsequent CRUD command auto-initializes graphrag.sqlite if missing.\n  \
261        - As a side effect, `init` warms a smoke-test embedding which auto-spawns the persistent daemon (~600s idle timeout).")]
262    Init(init::InitArgs),
263    /// Run or control the persistent embedding daemon
264    Daemon(daemon::DaemonArgs),
265    /// Save a memory with optional entity graph
266    #[command(after_long_help = "EXAMPLES:\n  \
267        # Inline body\n  \
268        sqlite-graphrag remember --name onboarding --type user --description \"intro\" --body \"hello\"\n\n  \
269        # Body from file\n  \
270        sqlite-graphrag remember --name doc1 --type document --description \"...\" --body-file ./README.md\n\n  \
271        # Body from stdin (pipe)\n  \
272        cat README.md | sqlite-graphrag remember --name doc1 --type document --description \"...\" --body-stdin\n\n  \
273        # Enable GLiNER entity extraction (disabled by default)\n  \
274        sqlite-graphrag remember --name rich --type note --description \"...\" --body \"...\" --enable-ner")]
275    Remember(remember::RememberArgs),
276    /// Bulk-ingest every file under a directory as separate memories (NDJSON output)
277    Ingest(ingest::IngestArgs),
278    /// Search memories semantically
279    #[command(after_long_help = "EXAMPLES:\n  \
280        # Top 10 semantic matches (default)\n  \
281        sqlite-graphrag recall \"agent memory\"\n\n  \
282        # Top 3 only\n  \
283        sqlite-graphrag recall \"agent memory\" -k 3\n\n  \
284        # Search across all namespaces\n  \
285        sqlite-graphrag recall \"agent memory\" --all-namespaces\n\n  \
286        # Disable graph traversal (vector-only)\n  \
287        sqlite-graphrag recall \"agent memory\" --no-graph")]
288    Recall(recall::RecallArgs),
289    /// Read a memory by exact name
290    Read(read::ReadArgs),
291    /// List memories with filters
292    List(list::ListArgs),
293    /// Soft-delete a memory
294    Forget(forget::ForgetArgs),
295    /// Permanently delete soft-deleted memories
296    Purge(purge::PurgeArgs),
297    /// Rename a memory preserving history
298    Rename(rename::RenameArgs),
299    /// Edit a memory's body or description
300    Edit(edit::EditArgs),
301    /// List all versions of a memory
302    History(history::HistoryArgs),
303    /// Restore a memory to a previous version
304    Restore(restore::RestoreArgs),
305    /// Search using hybrid vector + full-text search
306    #[command(after_long_help = "EXAMPLES:\n  \
307        # Hybrid search combining KNN + FTS5 BM25 with RRF\n  \
308        sqlite-graphrag hybrid-search \"agent memory architecture\"\n\n  \
309        # Custom weights for vector vs full-text components\n  \
310        sqlite-graphrag hybrid-search \"agent\" --weight-vec 0.7 --weight-fts 0.3")]
311    HybridSearch(hybrid_search::HybridSearchArgs),
312    /// Show database health
313    Health(health::HealthArgs),
314    /// Apply pending schema migrations
315    Migrate(migrate::MigrateArgs),
316    /// Resolve namespace precedence for the current invocation
317    NamespaceDetect(namespace_detect::NamespaceDetectArgs),
318    /// Run PRAGMA optimize on the database
319    Optimize(optimize::OptimizeArgs),
320    /// Show database statistics
321    Stats(stats::StatsArgs),
322    /// Create a checkpointed copy safe for file sync
323    SyncSafeCopy(sync_safe_copy::SyncSafeCopyArgs),
324    /// Run VACUUM after checkpointing the WAL
325    Vacuum(vacuum::VacuumArgs),
326    /// Create an explicit relationship between two entities
327    Link(link::LinkArgs),
328    /// Remove a specific relationship between two entities
329    Unlink(unlink::UnlinkArgs),
330    /// List memories connected via the entity graph
331    Related(related::RelatedArgs),
332    /// Export a graph snapshot in json, dot or mermaid
333    Graph(graph_export::GraphArgs),
334    /// Bulk-delete all relationships of a given type (e.g. mentions)
335    PruneRelations(prune_relations::PruneRelationsArgs),
336    /// Remove entities that have no memories and no relationships
337    CleanupOrphans(cleanup_orphans::CleanupOrphansArgs),
338    /// Manage cached resources (embedding models, etc.)
339    Cache(cache::CacheArgs),
340    #[command(name = "__debug_schema", hide = true)]
341    DebugSchema(debug_schema::DebugSchemaArgs),
342}
343
344#[derive(Copy, Clone, Debug, Default, clap::ValueEnum)]
345pub enum MemoryType {
346    User,
347    Feedback,
348    Project,
349    Reference,
350    Decision,
351    Incident,
352    Skill,
353    #[default]
354    Document,
355    Note,
356}
357
358#[cfg(test)]
359mod heavy_concurrency_tests {
360    use super::*;
361
362    #[test]
363    fn command_heavy_detects_init_and_embeddings() {
364        let init = Cli::try_parse_from(["sqlite-graphrag", "init"]).expect("parse init");
365        assert!(init.command.is_embedding_heavy());
366
367        let remember = Cli::try_parse_from([
368            "sqlite-graphrag",
369            "remember",
370            "--name",
371            "test-memory",
372            "--type",
373            "project",
374            "--description",
375            "desc",
376        ])
377        .expect("parse remember");
378        assert!(remember.command.is_embedding_heavy());
379
380        let recall =
381            Cli::try_parse_from(["sqlite-graphrag", "recall", "query"]).expect("parse recall");
382        assert!(recall.command.is_embedding_heavy());
383
384        let hybrid = Cli::try_parse_from(["sqlite-graphrag", "hybrid-search", "query"])
385            .expect("parse hybrid");
386        assert!(hybrid.command.is_embedding_heavy());
387    }
388
389    #[test]
390    fn command_light_does_not_mark_stats() {
391        let stats = Cli::try_parse_from(["sqlite-graphrag", "stats"]).expect("parse stats");
392        assert!(!stats.command.is_embedding_heavy());
393    }
394}
395
396impl MemoryType {
397    pub fn as_str(&self) -> &'static str {
398        match self {
399            Self::User => "user",
400            Self::Feedback => "feedback",
401            Self::Project => "project",
402            Self::Reference => "reference",
403            Self::Decision => "decision",
404            Self::Incident => "incident",
405            Self::Skill => "skill",
406            Self::Document => "document",
407            Self::Note => "note",
408        }
409    }
410}