Skip to main content

rlm_rs/cli/
parser.rs

1//! Command-line argument parsing.
2//!
3//! Defines the CLI structure using clap derive macros.
4
5use clap::{Parser, Subcommand};
6use std::path::PathBuf;
7
8use crate::chunking::{DEFAULT_CHUNK_SIZE, DEFAULT_OVERLAP};
9
10/// RLM-RS: Recursive Language Model REPL for Claude Code.
11///
12/// A CLI tool for handling large context files via chunking and
13/// recursive sub-LLM calls.
14#[derive(Parser, Debug)]
15#[command(name = "rlm-cli")]
16#[command(version, about, long_about = None)]
17#[command(propagate_version = true)]
18pub struct Cli {
19    /// Path to the RLM database file.
20    ///
21    /// Defaults to `.rlm/rlm-state.db` in the current directory.
22    #[arg(short, long, env = "RLM_DB_PATH")]
23    pub db_path: Option<PathBuf>,
24
25    /// Enable verbose output.
26    #[arg(short, long, global = true)]
27    pub verbose: bool,
28
29    /// Output format (text, json).
30    #[arg(long, default_value = "text", global = true)]
31    pub format: String,
32
33    /// The subcommand to execute.
34    #[command(subcommand)]
35    pub command: Commands,
36}
37
38/// Available CLI commands.
39#[derive(Subcommand, Debug)]
40pub enum Commands {
41    /// Initialize the RLM database.
42    ///
43    /// Creates the database file and schema if they don't exist.
44    #[command(after_help = r#"Examples:
45  rlm-cli init                    # Initialize in current directory
46  rlm-cli init --force            # Re-initialize (destroys existing data)
47  rlm-cli --db-path ./my.db init  # Initialize with custom path
48"#)]
49    Init {
50        /// Force re-initialization (destroys existing data).
51        #[arg(short, long)]
52        force: bool,
53    },
54
55    /// Show current RLM state status.
56    Status,
57
58    /// Reset RLM state (delete all data).
59    Reset {
60        /// Skip confirmation prompt.
61        #[arg(short = 'y', long)]
62        yes: bool,
63    },
64
65    /// Load a context file into a buffer.
66    #[command(after_help = r#"Examples:
67  rlm-cli load large_file.txt                      # Load with semantic chunking
68  rlm-cli load src/main.rs --name main-source      # Load with custom name
69  rlm-cli load src/lib.rs --chunker code           # Code-aware chunking
70  rlm-cli load doc.md --chunker fixed --chunk-size 2000
71  rlm-cli load big.log --chunker parallel          # Parallel for large files
72  rlm-cli --format json load file.txt | jq '.buffer_id'
73"#)]
74    Load {
75        /// Path to the context file.
76        file: PathBuf,
77
78        /// Optional name for the buffer.
79        #[arg(short, long)]
80        name: Option<String>,
81
82        /// Chunking strategy (fixed, semantic, code, parallel).
83        #[arg(short, long, default_value = "semantic")]
84        chunker: String,
85
86        /// Chunk size in characters.
87        #[arg(long, default_value_t = DEFAULT_CHUNK_SIZE)]
88        chunk_size: usize,
89
90        /// Overlap between chunks in characters.
91        #[arg(long, default_value_t = DEFAULT_OVERLAP)]
92        overlap: usize,
93    },
94
95    /// List all buffers.
96    #[command(name = "list", alias = "ls")]
97    #[command(after_help = r#"Examples:
98  rlm-cli list                            # List all buffers
99  rlm-cli ls                              # Alias for list
100  rlm-cli --format json list | jq '.[].name'
101"#)]
102    ListBuffers,
103
104    /// Show buffer details.
105    #[command(name = "show")]
106    #[command(after_help = r#"Examples:
107  rlm-cli show main-source                # Show buffer by name
108  rlm-cli show 1                          # Show buffer by ID
109  rlm-cli show 1 --chunks                 # Include chunk list
110  rlm-cli --format json show 1            # JSON output
111"#)]
112    ShowBuffer {
113        /// Buffer ID or name.
114        buffer: String,
115
116        /// Show chunks as well.
117        #[arg(short, long)]
118        chunks: bool,
119    },
120
121    /// Delete a buffer.
122    #[command(name = "delete", alias = "rm")]
123    DeleteBuffer {
124        /// Buffer ID or name.
125        buffer: String,
126
127        /// Skip confirmation prompt.
128        #[arg(short = 'y', long)]
129        yes: bool,
130    },
131
132    /// Peek at buffer content.
133    Peek {
134        /// Buffer ID or name.
135        buffer: String,
136
137        /// Start offset in bytes.
138        #[arg(long, default_value = "0")]
139        start: usize,
140
141        /// End offset in bytes (default: start + 3000).
142        #[arg(long)]
143        end: Option<usize>,
144    },
145
146    /// Search buffer content with regex.
147    Grep {
148        /// Buffer ID or name.
149        buffer: String,
150
151        /// Search pattern (regex).
152        pattern: String,
153
154        /// Maximum number of matches.
155        #[arg(short = 'n', long, default_value = "20")]
156        max_matches: usize,
157
158        /// Context window size around matches.
159        #[arg(short, long, default_value = "120")]
160        window: usize,
161
162        /// Case-insensitive search.
163        #[arg(short, long)]
164        ignore_case: bool,
165    },
166
167    /// Get chunk indices for a buffer.
168    ChunkIndices {
169        /// Buffer ID or name.
170        buffer: String,
171
172        /// Chunk size in characters.
173        #[arg(long, default_value_t = DEFAULT_CHUNK_SIZE)]
174        chunk_size: usize,
175
176        /// Overlap between chunks in characters.
177        #[arg(long, default_value_t = DEFAULT_OVERLAP)]
178        overlap: usize,
179    },
180
181    /// Write chunks to files.
182    WriteChunks {
183        /// Buffer ID or name.
184        buffer: String,
185
186        /// Output directory.
187        #[arg(short, long, default_value = ".rlm/chunks")]
188        out_dir: PathBuf,
189
190        /// Chunk size in characters.
191        #[arg(long, default_value_t = DEFAULT_CHUNK_SIZE)]
192        chunk_size: usize,
193
194        /// Overlap between chunks in characters.
195        #[arg(long, default_value_t = DEFAULT_OVERLAP)]
196        overlap: usize,
197
198        /// Filename prefix.
199        #[arg(long, default_value = "chunk")]
200        prefix: String,
201    },
202
203    /// Add text to a buffer (intermediate results).
204    AddBuffer {
205        /// Buffer name.
206        name: String,
207
208        /// Content to add (reads from stdin if not provided).
209        content: Option<String>,
210    },
211
212    /// Update an existing buffer with new content.
213    ///
214    /// Re-chunks the buffer and incrementally updates embeddings.
215    #[command(after_help = r#"Examples:
216  cat updated.txt | rlm-cli update main-source   # Update from stdin
217  rlm-cli update my-buffer "new content"         # Update with inline content
218  rlm-cli update my-buffer --embed               # Update and generate embeddings
219  rlm-cli update my-buffer --chunk-size 500      # Custom chunk size"#)]
220    #[command(alias = "update")]
221    UpdateBuffer {
222        /// Buffer ID or name.
223        buffer: String,
224
225        /// New content (reads from stdin if not provided).
226        content: Option<String>,
227
228        /// Automatically embed new chunks after update.
229        #[arg(short, long)]
230        embed: bool,
231
232        /// Chunking strategy (semantic, fixed, parallel).
233        #[arg(long, default_value = "semantic")]
234        strategy: String,
235
236        /// Chunk size in characters.
237        #[arg(long, default_value_t = DEFAULT_CHUNK_SIZE)]
238        chunk_size: usize,
239
240        /// Chunk overlap in characters.
241        #[arg(long, default_value_t = DEFAULT_OVERLAP)]
242        overlap: usize,
243    },
244
245    /// Export all buffers to a file.
246    ExportBuffers {
247        /// Output file path (stdout if not specified).
248        #[arg(short, long)]
249        output: Option<PathBuf>,
250
251        /// Pretty-print if JSON format.
252        #[arg(short, long)]
253        pretty: bool,
254    },
255
256    /// Set or get context variables.
257    #[command(name = "var")]
258    Variable {
259        /// Variable name.
260        name: String,
261
262        /// Value to set (omit to get current value).
263        value: Option<String>,
264
265        /// Delete the variable.
266        #[arg(short, long)]
267        delete: bool,
268    },
269
270    /// Set or get global variables.
271    Global {
272        /// Variable name.
273        name: String,
274
275        /// Value to set (omit to get current value).
276        value: Option<String>,
277
278        /// Delete the variable.
279        #[arg(short, long)]
280        delete: bool,
281    },
282
283    /// Search chunks using hybrid semantic + BM25 search.
284    ///
285    /// Returns chunk IDs and scores. Use `chunk get <id>` to retrieve content.
286    #[command(after_help = r#"Examples:
287  rlm-cli search "error handling"                  # Hybrid search (default)
288  rlm-cli search "authentication" -k 5             # Top 5 results
289  rlm-cli search "config" --mode bm25              # BM25 keyword search only
290  rlm-cli search "API" --mode semantic             # Semantic search only
291  rlm-cli search "bug fix" --buffer main-source    # Filter by buffer
292  rlm-cli search "auth" --preview                  # Include content preview
293  rlm-cli --format json search "test" | jq '.results[].chunk_id'
294"#)]
295    Search {
296        /// Search query text.
297        query: String,
298
299        /// Maximum number of results.
300        #[arg(short = 'k', long, default_value = "10")]
301        top_k: usize,
302
303        /// Minimum similarity threshold (0.0-1.0).
304        #[arg(short, long, default_value = "0.3")]
305        threshold: f32,
306
307        /// Search mode: hybrid, semantic, bm25.
308        #[arg(short, long, default_value = "hybrid")]
309        mode: String,
310
311        /// RRF k parameter for rank fusion.
312        #[arg(long, default_value = "60")]
313        rrf_k: u32,
314
315        /// Filter by buffer ID or name.
316        #[arg(short, long)]
317        buffer: Option<String>,
318
319        /// Include content preview in results.
320        #[arg(short, long)]
321        preview: bool,
322
323        /// Preview length in characters.
324        #[arg(long, default_value = "150")]
325        preview_len: usize,
326    },
327
328    /// Aggregate findings from analyst subagents.
329    ///
330    /// Reads JSON findings from stdin or a buffer, groups by relevance,
331    /// deduplicates, and outputs a synthesizer-ready report.
332    #[command(after_help = r#"Examples:
333  cat findings.json | rlm-cli aggregate           # Aggregate from stdin
334  rlm-cli aggregate --buffer findings             # Read from buffer
335  rlm-cli aggregate --min-relevance medium        # Filter low relevance
336  rlm-cli --format json aggregate | jq '.findings'
337
338Input format (JSON array of analyst findings):
339[
340  {"chunk_id": 12, "relevance": "high", "findings": ["..."], "summary": "..."},
341  {"chunk_id": 27, "relevance": "medium", "findings": ["..."], "summary": "..."}
342]"#)]
343    Aggregate {
344        /// Read findings from a buffer instead of stdin.
345        #[arg(short, long)]
346        buffer: Option<String>,
347
348        /// Minimum relevance to include (none, low, medium, high).
349        #[arg(long, default_value = "low")]
350        min_relevance: String,
351
352        /// Group findings by this field (`chunk_id`, `relevance`, `none`).
353        #[arg(long, default_value = "relevance")]
354        group_by: String,
355
356        /// Sort findings by this field (`relevance`, `chunk_id`, `findings_count`).
357        #[arg(long, default_value = "relevance")]
358        sort_by: String,
359
360        /// Store aggregated results in a new buffer with this name.
361        #[arg(short, long)]
362        output_buffer: Option<String>,
363    },
364
365    /// Dispatch chunks for parallel subagent processing.
366    ///
367    /// Splits chunks into batches suitable for parallel subagent analysis.
368    /// Returns batch assignments with chunk IDs and metadata.
369    #[command(after_help = r#"Examples:
370  rlm-cli dispatch my-buffer                     # Dispatch all chunks
371  rlm-cli dispatch my-buffer --batch-size 5      # 5 chunks per batch
372  rlm-cli dispatch my-buffer --workers 4         # Split into 4 batches
373  rlm-cli dispatch my-buffer --query "error"     # Only relevant chunks
374  rlm-cli --format json dispatch my-buffer       # JSON for orchestrator"#)]
375    Dispatch {
376        /// Buffer ID or name.
377        buffer: String,
378
379        /// Number of chunks per batch (overrides --workers).
380        #[arg(long, default_value = "10")]
381        batch_size: usize,
382
383        /// Number of worker batches to create (alternative to --batch-size).
384        #[arg(long)]
385        workers: Option<usize>,
386
387        /// Filter to chunks matching this search query.
388        #[arg(short, long)]
389        query: Option<String>,
390
391        /// Search mode for query filtering (hybrid, semantic, bm25).
392        #[arg(long, default_value = "hybrid")]
393        mode: String,
394
395        /// Minimum similarity threshold for query filtering.
396        #[arg(long, default_value = "0.3")]
397        threshold: f32,
398    },
399
400    /// Chunk operations (get, list, embed).
401    #[command(subcommand)]
402    Chunk(ChunkCommands),
403}
404
405/// Chunk subcommands for pass-by-reference retrieval.
406#[derive(Subcommand, Debug)]
407pub enum ChunkCommands {
408    /// Get a chunk by ID.
409    ///
410    /// Returns the chunk content and metadata. This is the primary
411    /// pass-by-reference retrieval mechanism for subagents.
412    #[command(after_help = r#"Examples:
413  rlm-cli chunk get 42                    # Get chunk content
414  rlm-cli chunk get 42 --metadata         # Include byte range, token count
415  rlm-cli --format json chunk get 42      # JSON output for programmatic use
416"#)]
417    Get {
418        /// Chunk ID.
419        id: i64,
420
421        /// Include metadata in output.
422        #[arg(short, long)]
423        metadata: bool,
424    },
425
426    /// List chunks for a buffer.
427    #[command(after_help = r#"Examples:
428  rlm-cli chunk list main-source          # List chunk IDs
429  rlm-cli chunk list 1 --preview          # Show content preview
430  rlm-cli --format json chunk list 1 | jq '.[].id'
431"#)]
432    List {
433        /// Buffer ID or name.
434        buffer: String,
435
436        /// Show content preview.
437        #[arg(short, long)]
438        preview: bool,
439
440        /// Preview length in characters.
441        #[arg(long, default_value = "100")]
442        preview_len: usize,
443    },
444
445    /// Generate embeddings for buffer chunks.
446    #[command(after_help = r#"Examples:
447  rlm-cli chunk embed main-source         # Generate embeddings
448  rlm-cli chunk embed 1 --force           # Re-embed existing chunks
449"#)]
450    Embed {
451        /// Buffer ID or name.
452        buffer: String,
453
454        /// Re-embed even if already embedded.
455        #[arg(short, long)]
456        force: bool,
457    },
458
459    /// Show embedding status for buffers.
460    Status,
461}
462
463impl Cli {
464    /// Returns the database path, using the default if not specified.
465    #[must_use]
466    pub fn get_db_path(&self) -> PathBuf {
467        self.db_path
468            .clone()
469            .unwrap_or_else(|| PathBuf::from(crate::storage::DEFAULT_DB_PATH))
470    }
471}
472
473#[cfg(test)]
474mod tests {
475    use super::*;
476    use clap::CommandFactory;
477
478    #[test]
479    fn test_cli_parse() {
480        // Test that CLI can be created
481        Cli::command().debug_assert();
482    }
483
484    #[test]
485    fn test_default_db_path() {
486        let cli = Cli {
487            db_path: None,
488            verbose: false,
489            format: "text".to_string(),
490            command: Commands::Status,
491        };
492        assert_eq!(
493            cli.get_db_path(),
494            PathBuf::from(crate::storage::DEFAULT_DB_PATH)
495        );
496    }
497
498    #[test]
499    fn test_custom_db_path() {
500        let cli = Cli {
501            db_path: Some(PathBuf::from("/custom/path.db")),
502            verbose: false,
503            format: "text".to_string(),
504            command: Commands::Status,
505        };
506        assert_eq!(cli.get_db_path(), PathBuf::from("/custom/path.db"));
507    }
508}