Skip to main content

sqlite_graphrag/
cli.rs

1//! CLI argument structs and command surface (clap-based).
2//!
3//! Defines `Cli` and all subcommand enums; contains no business logic.
4
5use crate::commands::*;
6use crate::i18n::{current, Language};
7use clap::{Parser, Subcommand};
8
9/// Returns the maximum simultaneous invocations allowed by the CPU heuristic.
10fn max_concurrency_ceiling() -> usize {
11    std::thread::available_parallelism()
12        .map(|n| n.get() * 2)
13        .unwrap_or(8)
14}
15
16#[derive(Copy, Clone, Debug, clap::ValueEnum)]
17pub enum RelationKind {
18    AppliesTo,
19    Uses,
20    DependsOn,
21    Causes,
22    Fixes,
23    Contradicts,
24    Supports,
25    Follows,
26    Related,
27    Mentions,
28    Replaces,
29    TrackedIn,
30}
31
32impl RelationKind {
33    pub fn as_str(&self) -> &'static str {
34        match self {
35            Self::AppliesTo => "applies_to",
36            Self::Uses => "uses",
37            Self::DependsOn => "depends_on",
38            Self::Causes => "causes",
39            Self::Fixes => "fixes",
40            Self::Contradicts => "contradicts",
41            Self::Supports => "supports",
42            Self::Follows => "follows",
43            Self::Related => "related",
44            Self::Mentions => "mentions",
45            Self::Replaces => "replaces",
46            Self::TrackedIn => "tracked_in",
47        }
48    }
49}
50
51#[derive(Copy, Clone, Debug, clap::ValueEnum)]
52pub enum GraphExportFormat {
53    Json,
54    Dot,
55    Mermaid,
56}
57
58#[derive(Parser)]
59#[command(name = "sqlite-graphrag")]
60#[command(version)]
61#[command(about = "Local GraphRAG memory for LLMs in a single SQLite file")]
62#[command(arg_required_else_help = true)]
63pub struct Cli {
64    /// Maximum number of simultaneous CLI invocations allowed (default: 4).
65    ///
66    /// Caps the counting semaphore used for CLI concurrency slots. The value must
67    /// stay within [1, 2×nCPUs]. Values above the ceiling are rejected with exit 2.
68    #[arg(long, global = true, value_name = "N")]
69    pub max_concurrency: Option<usize>,
70
71    /// Wait up to SECONDS for a free concurrency slot before giving up (exit 75).
72    ///
73    /// Useful in retrying agent pipelines: the process polls every 500 ms until a
74    /// slot opens or the timeout expires. Default: 300s (5 minutes).
75    #[arg(long, global = true, value_name = "SECONDS")]
76    pub wait_lock: Option<u64>,
77
78    /// Skip the available-memory check before loading the model.
79    ///
80    /// Exclusive use in automated tests where real allocation does not occur.
81    #[arg(long, global = true, hide = true, default_value_t = false)]
82    pub skip_memory_guard: bool,
83
84    /// Language for human-facing stderr messages. Accepts `en` or `pt`.
85    ///
86    /// Without the flag, detection falls back to `SQLITE_GRAPHRAG_LANG` and then
87    /// `LC_ALL`/`LANG`. JSON stdout stays deterministic and identical across
88    /// languages; only human-facing strings are affected.
89    #[arg(long, global = true, value_enum, value_name = "LANG")]
90    pub lang: Option<crate::i18n::Language>,
91
92    /// Time zone for `*_iso` fields in JSON output (for example `America/Sao_Paulo`).
93    ///
94    /// Accepts any IANA time zone name. Without the flag, it falls back to
95    /// `SQLITE_GRAPHRAG_DISPLAY_TZ`; if unset, UTC is used. Integer epoch fields
96    /// are not affected.
97    #[arg(long, global = true, value_name = "IANA")]
98    pub tz: Option<chrono_tz::Tz>,
99
100    /// Increase logging verbosity (-v=info, -vv=debug, -vvv=trace).
101    ///
102    /// Overrides `SQLITE_GRAPHRAG_LOG_LEVEL` env var when present. Logs are emitted
103    /// to stderr; JSON stdout is unaffected.
104    #[arg(short = 'v', long, global = true, action = clap::ArgAction::Count)]
105    pub verbose: u8,
106
107    #[command(subcommand)]
108    pub command: Commands,
109}
110
111#[cfg(test)]
112mod json_only_format_tests {
113    use super::Cli;
114    use clap::Parser;
115
116    #[test]
117    fn restore_accepts_only_format_json() {
118        assert!(Cli::try_parse_from([
119            "sqlite-graphrag",
120            "restore",
121            "--name",
122            "mem",
123            "--version",
124            "1",
125            "--format",
126            "json",
127        ])
128        .is_ok());
129
130        assert!(Cli::try_parse_from([
131            "sqlite-graphrag",
132            "restore",
133            "--name",
134            "mem",
135            "--version",
136            "1",
137            "--format",
138            "text",
139        ])
140        .is_err());
141    }
142
143    #[test]
144    fn hybrid_search_accepts_only_format_json() {
145        assert!(Cli::try_parse_from([
146            "sqlite-graphrag",
147            "hybrid-search",
148            "query",
149            "--format",
150            "json",
151        ])
152        .is_ok());
153
154        assert!(Cli::try_parse_from([
155            "sqlite-graphrag",
156            "hybrid-search",
157            "query",
158            "--format",
159            "markdown",
160        ])
161        .is_err());
162    }
163
164    #[test]
165    fn remember_recall_rename_vacuum_json_only() {
166        assert!(Cli::try_parse_from([
167            "sqlite-graphrag",
168            "remember",
169            "--name",
170            "mem",
171            "--type",
172            "project",
173            "--description",
174            "desc",
175            "--format",
176            "json",
177        ])
178        .is_ok());
179        assert!(Cli::try_parse_from([
180            "sqlite-graphrag",
181            "remember",
182            "--name",
183            "mem",
184            "--type",
185            "project",
186            "--description",
187            "desc",
188            "--format",
189            "text",
190        ])
191        .is_err());
192
193        assert!(
194            Cli::try_parse_from(["sqlite-graphrag", "recall", "query", "--format", "json",])
195                .is_ok()
196        );
197        assert!(
198            Cli::try_parse_from(["sqlite-graphrag", "recall", "query", "--format", "text",])
199                .is_err()
200        );
201
202        assert!(Cli::try_parse_from([
203            "sqlite-graphrag",
204            "rename",
205            "--name",
206            "old",
207            "--new-name",
208            "new",
209            "--format",
210            "json",
211        ])
212        .is_ok());
213        assert!(Cli::try_parse_from([
214            "sqlite-graphrag",
215            "rename",
216            "--name",
217            "old",
218            "--new-name",
219            "new",
220            "--format",
221            "markdown",
222        ])
223        .is_err());
224
225        assert!(Cli::try_parse_from(["sqlite-graphrag", "vacuum", "--format", "json",]).is_ok());
226        assert!(Cli::try_parse_from(["sqlite-graphrag", "vacuum", "--format", "text",]).is_err());
227    }
228}
229
230impl Cli {
231    /// Validates concurrency flags and returns a localised descriptive error if invalid.
232    ///
233    /// Requires that `crate::i18n::init()` has already been called (happens before this
234    /// function in the `main` flow). In English it emits EN messages; in Portuguese it emits PT.
235    pub fn validate_flags(&self) -> Result<(), String> {
236        if let Some(n) = self.max_concurrency {
237            if n == 0 {
238                return Err(match current() {
239                    Language::English => "--max-concurrency must be >= 1".to_string(),
240                    Language::Portuguese => "--max-concurrency deve ser >= 1".to_string(),
241                });
242            }
243            let teto = max_concurrency_ceiling();
244            if n > teto {
245                return Err(match current() {
246                    Language::English => format!(
247                        "--max-concurrency {n} exceeds the ceiling of {teto} (2×nCPUs) on this system"
248                    ),
249                    Language::Portuguese => format!(
250                        "--max-concurrency {n} excede o teto de {teto} (2×nCPUs) neste sistema"
251                    ),
252                });
253            }
254        }
255        Ok(())
256    }
257}
258
259impl Commands {
260    /// Retorna true para subcomandos que carregam o modelo ONNX localmente.
261    pub fn is_embedding_heavy(&self) -> bool {
262        matches!(
263            self,
264            Self::Init(_) | Self::Remember(_) | Self::Recall(_) | Self::HybridSearch(_)
265        )
266    }
267
268    pub fn uses_cli_slot(&self) -> bool {
269        !matches!(self, Self::Daemon(_))
270    }
271}
272
273#[derive(Subcommand)]
274pub enum Commands {
275    /// Initialize database and download embedding model
276    #[command(after_long_help = "EXAMPLES:\n  \
277        # Initialize in current directory (default behavior)\n  \
278        sqlite-graphrag init\n\n  \
279        # Initialize at a specific path\n  \
280        sqlite-graphrag init --db /path/to/graphrag.sqlite\n\n  \
281        # Initialize using SQLITE_GRAPHRAG_HOME env var\n  \
282        SQLITE_GRAPHRAG_HOME=/data sqlite-graphrag init")]
283    Init(init::InitArgs),
284    /// Run or control the persistent embedding daemon
285    Daemon(daemon::DaemonArgs),
286    /// Save a memory with optional entity graph
287    #[command(after_long_help = "EXAMPLES:\n  \
288        # Inline body\n  \
289        sqlite-graphrag remember --name onboarding --type user --description \"intro\" --body \"hello\"\n\n  \
290        # Body from file\n  \
291        sqlite-graphrag remember --name doc1 --type document --description \"...\" --body-file ./README.md\n\n  \
292        # Body from stdin (pipe)\n  \
293        cat README.md | sqlite-graphrag remember --name doc1 --type document --description \"...\" --body-stdin\n\n  \
294        # Skip BERT entity extraction (faster)\n  \
295        sqlite-graphrag remember --name quick --type note --description \"...\" --body \"...\" --skip-extraction")]
296    Remember(remember::RememberArgs),
297    /// Bulk-ingest every file under a directory as separate memories (NDJSON output)
298    Ingest(ingest::IngestArgs),
299    /// Search memories semantically
300    #[command(after_long_help = "EXAMPLES:\n  \
301        # Top 10 semantic matches (default)\n  \
302        sqlite-graphrag recall \"agent memory\"\n\n  \
303        # Top 3 only\n  \
304        sqlite-graphrag recall \"agent memory\" -k 3\n\n  \
305        # Search across all namespaces\n  \
306        sqlite-graphrag recall \"agent memory\" --all-namespaces\n\n  \
307        # Disable graph traversal (vector-only)\n  \
308        sqlite-graphrag recall \"agent memory\" --no-graph")]
309    Recall(recall::RecallArgs),
310    /// Read a memory by exact name
311    Read(read::ReadArgs),
312    /// List memories with filters
313    List(list::ListArgs),
314    /// Soft-delete a memory
315    Forget(forget::ForgetArgs),
316    /// Permanently delete soft-deleted memories
317    Purge(purge::PurgeArgs),
318    /// Rename a memory preserving history
319    Rename(rename::RenameArgs),
320    /// Edit a memory's body or description
321    Edit(edit::EditArgs),
322    /// List all versions of a memory
323    History(history::HistoryArgs),
324    /// Restore a memory to a previous version
325    Restore(restore::RestoreArgs),
326    /// Search using hybrid vector + full-text search
327    #[command(after_long_help = "EXAMPLES:\n  \
328        # Hybrid search combining KNN + FTS5 BM25 with RRF\n  \
329        sqlite-graphrag hybrid-search \"agent memory architecture\"\n\n  \
330        # Custom weights for vector vs full-text components\n  \
331        sqlite-graphrag hybrid-search \"agent\" --weight-vec 0.7 --weight-fts 0.3")]
332    HybridSearch(hybrid_search::HybridSearchArgs),
333    /// Show database health
334    Health(health::HealthArgs),
335    /// Apply pending schema migrations
336    Migrate(migrate::MigrateArgs),
337    /// Resolve namespace precedence for the current invocation
338    NamespaceDetect(namespace_detect::NamespaceDetectArgs),
339    /// Run PRAGMA optimize on the database
340    Optimize(optimize::OptimizeArgs),
341    /// Show database statistics
342    Stats(stats::StatsArgs),
343    /// Create a checkpointed copy safe for file sync
344    SyncSafeCopy(sync_safe_copy::SyncSafeCopyArgs),
345    /// Run VACUUM after checkpointing the WAL
346    Vacuum(vacuum::VacuumArgs),
347    /// Create an explicit relationship between two entities
348    Link(link::LinkArgs),
349    /// Remove a specific relationship between two entities
350    Unlink(unlink::UnlinkArgs),
351    /// List memories connected via the entity graph
352    Related(related::RelatedArgs),
353    /// Export a graph snapshot in json, dot or mermaid
354    Graph(graph_export::GraphArgs),
355    /// Remove entities that have no memories and no relationships
356    CleanupOrphans(cleanup_orphans::CleanupOrphansArgs),
357    /// Manage cached resources (embedding models, etc.)
358    Cache(cache::CacheArgs),
359    #[command(name = "__debug_schema", hide = true)]
360    DebugSchema(debug_schema::DebugSchemaArgs),
361}
362
363#[derive(Copy, Clone, Debug, clap::ValueEnum)]
364pub enum MemoryType {
365    User,
366    Feedback,
367    Project,
368    Reference,
369    Decision,
370    Incident,
371    Skill,
372    Document,
373    Note,
374}
375
376#[cfg(test)]
377mod heavy_concurrency_tests {
378    use super::*;
379
380    #[test]
381    fn command_heavy_detects_init_and_embeddings() {
382        let init = Cli::try_parse_from(["sqlite-graphrag", "init"]).expect("parse init");
383        assert!(init.command.is_embedding_heavy());
384
385        let remember = Cli::try_parse_from([
386            "sqlite-graphrag",
387            "remember",
388            "--name",
389            "test-memory",
390            "--type",
391            "project",
392            "--description",
393            "desc",
394        ])
395        .expect("parse remember");
396        assert!(remember.command.is_embedding_heavy());
397
398        let recall =
399            Cli::try_parse_from(["sqlite-graphrag", "recall", "query"]).expect("parse recall");
400        assert!(recall.command.is_embedding_heavy());
401
402        let hybrid = Cli::try_parse_from(["sqlite-graphrag", "hybrid-search", "query"])
403            .expect("parse hybrid");
404        assert!(hybrid.command.is_embedding_heavy());
405    }
406
407    #[test]
408    fn command_light_does_not_mark_stats() {
409        let stats = Cli::try_parse_from(["sqlite-graphrag", "stats"]).expect("parse stats");
410        assert!(!stats.command.is_embedding_heavy());
411    }
412}
413
414impl MemoryType {
415    pub fn as_str(&self) -> &'static str {
416        match self {
417            Self::User => "user",
418            Self::Feedback => "feedback",
419            Self::Project => "project",
420            Self::Reference => "reference",
421            Self::Decision => "decision",
422            Self::Incident => "incident",
423            Self::Skill => "skill",
424            Self::Document => "document",
425            Self::Note => "note",
426        }
427    }
428}