use clap::{Parser, Subcommand};
use std::path::PathBuf;
use crate::chunking::{DEFAULT_CHUNK_SIZE, DEFAULT_OVERLAP};
#[derive(Parser, Debug)]
#[command(name = "rlm-cli")]
#[command(version, about, long_about = None)]
#[command(propagate_version = true)]
pub struct Cli {
#[arg(short, long, env = "RLM_DB_PATH")]
pub db_path: Option<PathBuf>,
#[arg(short, long, global = true)]
pub verbose: bool,
#[arg(long, default_value = "text", global = true)]
pub format: String,
#[command(subcommand)]
pub command: Commands,
}
#[derive(Subcommand, Debug)]
pub enum Commands {
#[command(after_help = r#"Examples:
rlm-cli init # Initialize in current directory
rlm-cli init --force # Re-initialize (destroys existing data)
rlm-cli --db-path ./my.db init # Initialize with custom path
"#)]
Init {
#[arg(short, long)]
force: bool,
},
Status,
Reset {
#[arg(short = 'y', long)]
yes: bool,
},
#[command(after_help = r#"Examples:
rlm-cli load large_file.txt # Load with semantic chunking
rlm-cli load src/main.rs --name main-source # Load with custom name
rlm-cli load src/lib.rs --chunker code # Code-aware chunking
rlm-cli load doc.md --chunker fixed --chunk-size 2000
rlm-cli load big.log --chunker parallel # Parallel for large files
rlm-cli --format json load file.txt | jq '.buffer_id'
"#)]
Load {
file: PathBuf,
#[arg(short, long)]
name: Option<String>,
#[arg(short, long, default_value = "semantic")]
chunker: String,
#[arg(long, default_value_t = DEFAULT_CHUNK_SIZE)]
chunk_size: usize,
#[arg(long, default_value_t = DEFAULT_OVERLAP)]
overlap: usize,
},
#[command(name = "list", alias = "ls")]
#[command(after_help = r#"Examples:
rlm-cli list # List all buffers
rlm-cli ls # Alias for list
rlm-cli --format json list | jq '.[].name'
"#)]
ListBuffers,
#[command(name = "show")]
#[command(after_help = r#"Examples:
rlm-cli show main-source # Show buffer by name
rlm-cli show 1 # Show buffer by ID
rlm-cli show 1 --chunks # Include chunk list
rlm-cli --format json show 1 # JSON output
"#)]
ShowBuffer {
buffer: String,
#[arg(short, long)]
chunks: bool,
},
#[command(name = "delete", alias = "rm")]
DeleteBuffer {
buffer: String,
#[arg(short = 'y', long)]
yes: bool,
},
Peek {
buffer: String,
#[arg(long, default_value = "0")]
start: usize,
#[arg(long)]
end: Option<usize>,
},
Grep {
buffer: String,
pattern: String,
#[arg(short = 'n', long, default_value = "20")]
max_matches: usize,
#[arg(short, long, default_value = "120")]
window: usize,
#[arg(short, long)]
ignore_case: bool,
},
ChunkIndices {
buffer: String,
#[arg(long, default_value_t = DEFAULT_CHUNK_SIZE)]
chunk_size: usize,
#[arg(long, default_value_t = DEFAULT_OVERLAP)]
overlap: usize,
},
WriteChunks {
buffer: String,
#[arg(short, long, default_value = ".rlm/chunks")]
out_dir: PathBuf,
#[arg(long, default_value_t = DEFAULT_CHUNK_SIZE)]
chunk_size: usize,
#[arg(long, default_value_t = DEFAULT_OVERLAP)]
overlap: usize,
#[arg(long, default_value = "chunk")]
prefix: String,
},
AddBuffer {
name: String,
content: Option<String>,
},
#[command(after_help = r#"Examples:
cat updated.txt | rlm-cli update main-source # Update from stdin
rlm-cli update my-buffer "new content" # Update with inline content
rlm-cli update my-buffer --embed # Update and generate embeddings
rlm-cli update my-buffer --chunk-size 500 # Custom chunk size"#)]
#[command(alias = "update")]
UpdateBuffer {
buffer: String,
content: Option<String>,
#[arg(short, long)]
embed: bool,
#[arg(long, default_value = "semantic")]
strategy: String,
#[arg(long, default_value_t = DEFAULT_CHUNK_SIZE)]
chunk_size: usize,
#[arg(long, default_value_t = DEFAULT_OVERLAP)]
overlap: usize,
},
ExportBuffers {
#[arg(short, long)]
output: Option<PathBuf>,
#[arg(short, long)]
pretty: bool,
},
#[command(name = "var")]
Variable {
name: String,
value: Option<String>,
#[arg(short, long)]
delete: bool,
},
Global {
name: String,
value: Option<String>,
#[arg(short, long)]
delete: bool,
},
#[command(after_help = r#"Examples:
rlm-cli search "error handling" # Hybrid search (default)
rlm-cli search "authentication" -k 5 # Top 5 results
rlm-cli search "config" --mode bm25 # BM25 keyword search only
rlm-cli search "API" --mode semantic # Semantic search only
rlm-cli search "bug fix" --buffer main-source # Filter by buffer
rlm-cli search "auth" --preview # Include content preview
rlm-cli --format json search "test" | jq '.results[].chunk_id'
"#)]
Search {
query: String,
#[arg(short = 'k', long, default_value = "10")]
top_k: usize,
#[arg(short, long, default_value = "0.3")]
threshold: f32,
#[arg(short, long, default_value = "hybrid")]
mode: String,
#[arg(long, default_value = "60")]
rrf_k: u32,
#[arg(short, long)]
buffer: Option<String>,
#[arg(short, long)]
preview: bool,
#[arg(long, default_value = "150")]
preview_len: usize,
},
#[command(after_help = r#"Examples:
cat findings.json | rlm-cli aggregate # Aggregate from stdin
rlm-cli aggregate --buffer findings # Read from buffer
rlm-cli aggregate --min-relevance medium # Filter low relevance
rlm-cli --format json aggregate | jq '.findings'
Input format (JSON array of analyst findings):
[
{"chunk_id": 12, "relevance": "high", "findings": ["..."], "summary": "..."},
{"chunk_id": 27, "relevance": "medium", "findings": ["..."], "summary": "..."}
]"#)]
Aggregate {
#[arg(short, long)]
buffer: Option<String>,
#[arg(long, default_value = "low")]
min_relevance: String,
#[arg(long, default_value = "relevance")]
group_by: String,
#[arg(long, default_value = "relevance")]
sort_by: String,
#[arg(short, long)]
output_buffer: Option<String>,
},
#[command(after_help = r#"Examples:
rlm-cli dispatch my-buffer # Dispatch all chunks
rlm-cli dispatch my-buffer --batch-size 5 # 5 chunks per batch
rlm-cli dispatch my-buffer --workers 4 # Split into 4 batches
rlm-cli dispatch my-buffer --query "error" # Only relevant chunks
rlm-cli --format json dispatch my-buffer # JSON for orchestrator"#)]
Dispatch {
buffer: String,
#[arg(long, default_value = "10")]
batch_size: usize,
#[arg(long)]
workers: Option<usize>,
#[arg(short, long)]
query: Option<String>,
#[arg(long, default_value = "hybrid")]
mode: String,
#[arg(long, default_value = "0.3")]
threshold: f32,
},
#[command(subcommand)]
Chunk(ChunkCommands),
}
#[derive(Subcommand, Debug)]
pub enum ChunkCommands {
#[command(after_help = r#"Examples:
rlm-cli chunk get 42 # Get chunk content
rlm-cli chunk get 42 --metadata # Include byte range, token count
rlm-cli --format json chunk get 42 # JSON output for programmatic use
"#)]
Get {
id: i64,
#[arg(short, long)]
metadata: bool,
},
#[command(after_help = r#"Examples:
rlm-cli chunk list main-source # List chunk IDs
rlm-cli chunk list 1 --preview # Show content preview
rlm-cli --format json chunk list 1 | jq '.[].id'
"#)]
List {
buffer: String,
#[arg(short, long)]
preview: bool,
#[arg(long, default_value = "100")]
preview_len: usize,
},
#[command(after_help = r#"Examples:
rlm-cli chunk embed main-source # Generate embeddings
rlm-cli chunk embed 1 --force # Re-embed existing chunks
"#)]
Embed {
buffer: String,
#[arg(short, long)]
force: bool,
},
Status,
}
impl Cli {
#[must_use]
pub fn get_db_path(&self) -> PathBuf {
self.db_path
.clone()
.unwrap_or_else(|| PathBuf::from(crate::storage::DEFAULT_DB_PATH))
}
}
#[cfg(test)]
mod tests {
use super::*;
use clap::CommandFactory;
#[test]
fn test_cli_parse() {
Cli::command().debug_assert();
}
#[test]
fn test_default_db_path() {
let cli = Cli {
db_path: None,
verbose: false,
format: "text".to_string(),
command: Commands::Status,
};
assert_eq!(
cli.get_db_path(),
PathBuf::from(crate::storage::DEFAULT_DB_PATH)
);
}
#[test]
fn test_custom_db_path() {
let cli = Cli {
db_path: Some(PathBuf::from("/custom/path.db")),
verbose: false,
format: "text".to_string(),
command: Commands::Status,
};
assert_eq!(cli.get_db_path(), PathBuf::from("/custom/path.db"));
}
}