use std::path::PathBuf;
use clap::{Parser, Subcommand};
use crate::color::ColorChoice;
pub const MAIN_HELP: &str = "\
EXAMPLES:
# Search for code semantically (auto-indexes if needed)
colgrep \"function that handles user authentication\"
colgrep \"error handling for database connections\"
# Search in a specific directory
colgrep \"API endpoints\" ./backend
# Search in a specific file (or multiple files)
colgrep \"error handling\" ./src/main.rs
colgrep \"auth\" ./src/auth.rs ./src/login.rs
# Search in multiple directories (results are merged by score)
colgrep \"error handling\" ./src ./lib ./api
colgrep -e \"Result\" \"error\" ./crate-a ./crate-b -k 20
# Filter by file type (grep-like)
colgrep \"parse config\" --include \"*.rs\"
colgrep \"test helpers\" --include \"*_test.go\"
# Hybrid search: grep first, then rank semantically
colgrep \"usage\" -e \"async fn\"
colgrep \"error\" -e \"panic!\" --include \"*.rs\"
# List only matching files (like grep -l)
colgrep -l \"database queries\"
# Show full function/class content
colgrep -c \"parse config\"
colgrep --content \"authentication handler\" -k 5
# Output as JSON for scripting
colgrep --json \"authentication\" | jq '.[] | .unit.file'
# Build or update index (without searching)
colgrep init
colgrep init ~/projects/myapp
# Check index status
colgrep status
# Clear index
colgrep clear
colgrep clear --all
# Change default model (clears existing indexes)
colgrep set-model lightonai/LateOn-Code-edge
HF_TOKEN=hf_xxx colgrep set-model myorg/private-model
# Update to the latest version
colgrep update
SUPPORTED LANGUAGES:
Python, Rust, TypeScript, JavaScript, Go, Java, C, C++, C#, Ruby,
PHP, Swift, Kotlin, Scala, Lua, Elixir, Haskell, OCaml, QML, R, Zig,
Julia, Shell/Bash, SQL, Markdown, Plain text
ENVIRONMENT:
Indexes are stored in ~/.local/share/colgrep/ (or $XDG_DATA_HOME/colgrep)
Config is stored in ~/.config/colgrep/ (or $XDG_CONFIG_HOME/colgrep)";
pub const SEARCH_HELP: &str = "\
EXAMPLES:
# Basic semantic search
colgrep search \"function that handles authentication\"
colgrep search \"error handling\" ./backend
# Filter by file type
colgrep search \"parse config\" --include \"*.rs\"
colgrep search \"API handler\" --include \"*.go\"
# Hybrid search (grep + semantic ranking)
colgrep search \"usage\" -e \"async fn\"
colgrep search \"error\" -e \"Result<\" --include \"*.rs\"
# List only matching files
colgrep search -l \"database operations\"
# Show full function/class content
colgrep search -c \"parse config\"
colgrep search --content \"handler\" -k 5
# More results
colgrep search -k 20 \"logging utilities\"
# JSON output for scripting
colgrep search --json \"auth\" | jq '.[0].unit.name'
GREP COMPATIBILITY:
-r, --recursive Enabled by default (for grep users)
-l, --files-only Show only filenames, like grep -l
-c, --content Show syntax-highlighted content (up to 50 lines)
-e, --pattern Pre-filter with regex pattern (ERE syntax by default)
-E, --extended-regexp Kept for compatibility (regex is now default)
-F, --fixed-strings Interpret -e pattern as literal string (no regex)
-w, --word-regexp Match whole words only for -e pattern
--include Filter files by glob pattern
--exclude Exclude files matching pattern
--exclude-dir Exclude directories (supports glob patterns: vendor, */plugins, **/test_*)";
pub const STATUS_HELP: &str = "\
EXAMPLES:
colgrep status
colgrep status ~/projects/myapp";
pub const CLEAR_HELP: &str = "\
EXAMPLES:
# Clear index for current directory
colgrep clear
# Clear index for specific project
colgrep clear ~/projects/myapp
# Clear ALL indexes
colgrep clear -a
colgrep clear --all";
pub const SET_MODEL_HELP: &str = "\
EXAMPLES:
# Set default model (public HuggingFace model)
colgrep set-model lightonai/LateOn-Code-edge
# Use a different public model
colgrep set-model colbert-ir/colbertv2.0
# Use a private HuggingFace model (requires HF_TOKEN)
HF_TOKEN=hf_xxx colgrep set-model myorg/private-model
# Use a local model directory
colgrep set-model /path/to/local/model
AUTHENTICATION:
For private HuggingFace models, set your token via environment variable:
• HF_TOKEN=hf_xxx colgrep set-model org/private-model
• export HF_TOKEN=hf_xxx && colgrep set-model org/private-model
Token priority: HF_TOKEN > HUGGING_FACE_HUB_TOKEN > ~/.cache/huggingface/token
NOTES:
• Changing models clears all existing indexes (embedding dimensions differ)
• The model is downloaded from HuggingFace and cached automatically
• Model preference is stored in ~/.config/colgrep/config.json
• Use 'colgrep settings' to view the current model";
pub const INIT_HELP: &str = "\
EXAMPLES:
# Build or update index for the current directory
colgrep init
# Build or update index for a specific project
colgrep init ~/projects/myapp
# Force auto-confirm for large codebases
colgrep init -y
# Use a specific model
colgrep init --model lightonai/LateOn-Code-edge
# Override model/session batch size for this run
colgrep init --batch-size 2
# Override outer encoding batch size for benchmarking/tuning
colgrep init --encode-batch-size 1024
# Override outer index chunk size for benchmarking/tuning
colgrep init --index-chunk-size 4096
# Compare batch sort orders during encoding
colgrep init --batch-sort-order big-first
colgrep init --batch-sort-order small-first
NOTES:
• Creates a new index if none exists
• Incrementally updates the index if files changed
• Useful for pre-warming the index before searching
• Subsequent searches will be fast since the index is already built";
pub const CONFIG_HELP: &str = "\
EXAMPLES:
# Show current configuration
colgrep settings
# Set default number of results
colgrep settings --k 20
# Enable verbose output by default (full content with syntax highlighting)
colgrep settings --verbose
# Disable verbose output (compact filepath:lines format, this is the default)
colgrep settings --no-verbose
# Switch to INT8 quantized model (faster inference)
colgrep settings --int8
# Switch back to full-precision (FP32) model (default)
colgrep settings --fp32
# Set embedding pool factor (smaller index, faster search)
colgrep settings --pool-factor 2
# Disable embedding pooling (larger index, more precise)
colgrep settings --pool-factor 1
# Set parallel encoding sessions (default: auto-detected CPU count)
colgrep settings --parallel 8
# Set batch size per session (default: 1)
colgrep settings --batch-size 2
# Set parser recursion guard depth (default: 1024)
colgrep settings --max-recursion-depth 1024
# Set both at once
colgrep settings --k 25 --n 8
# Reset to defaults (unset)
colgrep settings --k 0 --n 0
# Add extra ignore patterns (on top of built-in defaults)
colgrep settings --ignore generated --ignore \"*.pb.go\"
# Remove an extra ignore pattern
colgrep settings --no-ignore generated
# Clear all extra ignore patterns (revert to defaults only)
colgrep settings --clear-ignore
# Force-include files/dirs that are normally ignored
colgrep settings --force-include .vscode --force-include vendor/internal
# Remove a force-include pattern
colgrep settings --no-force-include .vscode
# Clear all force-include patterns
colgrep settings --clear-force-include
# Show absolute paths in search output
colgrep settings --no-relative-paths
# Show relative paths in search output (default, saves tokens for LLM usage)
colgrep settings --relative-paths
NOTES:
• Values are stored in ~/.config/colgrep/config.json
• Use 0 to reset a value to its default
• These values override the CLI defaults when not explicitly specified
• Default output is compact (filepath:lines). Use -v or --verbose for full content
• FP32 (full-precision) is the default
• Pool factor 2 (default) reduces index size by ~50%. Use 1 to disable pooling
• Parallel sessions default to CPU count. Batch-size 1 (default) maximizes throughput
• Parser recursion depth defaults to 1024. Increase only if needed for deep ASTs
• Extra ignore patterns add to the built-in defaults (node_modules, .git, target, etc.)
• Force-include patterns override both built-in and extra ignore rules
• Patterns match directory/file names (e.g., \".vscode\") or path prefixes (e.g., \"vendor/internal\")
• Suffix patterns with * are supported (e.g., \"*.pb.go\")
• Use --relative-paths to display paths relative to the current directory (saves ~35% tokens for LLM usage)";
#[derive(Parser)]
#[command(
name = "colgrep",
version,
about = "Semantic grep - find code by meaning, not just text",
long_about = "Semantic grep - find code by meaning, not just text.\n\n\
colgrep is grep that understands what you're looking for. Search with\n\
natural language like \"error handling logic\" and find relevant code\n\
even when keywords don't match exactly.\n\n\
• Hybrid search: grep + semantic ranking with -e flag\n\
• Natural language queries\n\
• 18+ languages supported\n\
• Incremental indexing",
after_help = MAIN_HELP,
args_conflicts_with_subcommands = true
)]
pub struct Cli {
#[command(subcommand)]
pub command: Option<Commands>,
#[arg(value_name = "QUERY")]
pub query: Option<String>,
#[arg(value_name = "PATH")]
pub paths: Vec<PathBuf>,
#[arg(short = 'k', long = "results")]
pub top_k: Option<usize>,
#[arg(long)]
pub model: Option<String>,
#[arg(long)]
pub json: bool,
#[arg(short = 'r', long)]
pub recursive: bool,
#[arg(long = "include", value_name = "PATTERN")]
pub include_patterns: Vec<String>,
#[arg(short = 'l', long = "files-only")]
pub files_only: bool,
#[arg(short = 'c', long = "content")]
pub show_content: bool,
#[arg(short = 'n', long = "lines")]
pub context_lines: Option<usize>,
#[arg(short = 'e', long = "pattern", value_name = "PATTERN")]
pub text_pattern: Option<String>,
#[arg(short = 'E', long = "extended-regexp")]
pub extended_regexp: bool,
#[arg(short = 'F', long = "fixed-strings")]
pub fixed_strings: bool,
#[arg(short = 'w', long = "word-regexp")]
pub word_regexp: bool,
#[arg(short = 's', long = "case-sensitive")]
pub case_sensitive: bool,
#[arg(long = "exclude", value_name = "PATTERN")]
pub exclude_patterns: Vec<String>,
#[arg(long = "exclude-dir", value_name = "DIR")]
pub exclude_dirs: Vec<String>,
#[arg(long)]
pub stats: bool,
#[arg(long)]
pub reset_stats: bool,
#[arg(long)]
pub code_only: bool,
#[arg(long = "semantic-only")]
pub no_fts: bool,
#[arg(long, value_name = "FLOAT")]
pub alpha: Option<f32>,
#[arg(long = "install-claude-code")]
pub install_claude_code: bool,
#[arg(long = "uninstall-claude-code")]
pub uninstall_claude_code: bool,
#[arg(long = "install-opencode")]
pub install_opencode: bool,
#[arg(long = "uninstall-opencode")]
pub uninstall_opencode: bool,
#[arg(long = "install-codex")]
pub install_codex: bool,
#[arg(long = "uninstall-codex")]
pub uninstall_codex: bool,
#[arg(long = "install-hermes")]
pub install_hermes: bool,
#[arg(long = "uninstall-hermes")]
pub uninstall_hermes: bool,
#[arg(long = "uninstall")]
pub uninstall: bool,
#[arg(long = "session-hook", hide = true)]
pub session_hook: bool,
#[arg(long = "task-hook", hide = true)]
pub task_hook: bool,
#[arg(long = "no-pool")]
pub no_pool: bool,
#[arg(long = "pool-factor", value_name = "FACTOR")]
pub pool_factor: Option<usize>,
#[arg(short = 'y', long = "yes")]
pub auto_confirm: bool,
#[arg(
long = "color",
value_name = "WHEN",
default_value = "auto",
global = true
)]
pub color: ColorChoice,
#[arg(long = "force-cpu", global = true, conflicts_with = "force_gpu")]
pub force_cpu: bool,
#[arg(long = "force-gpu", global = true, conflicts_with = "force_cpu")]
pub force_gpu: bool,
}
#[derive(Subcommand)]
pub enum Commands {
#[command(after_help = SEARCH_HELP)]
Search {
query: Option<String>,
#[arg(value_name = "PATH")]
paths: Vec<PathBuf>,
#[arg(short = 'k', long = "results")]
top_k: Option<usize>,
#[arg(long)]
model: Option<String>,
#[arg(long)]
json: bool,
#[arg(short = 'r', long)]
recursive: bool,
#[arg(long = "include", value_name = "PATTERN")]
include_patterns: Vec<String>,
#[arg(short = 'l', long = "files-only")]
files_only: bool,
#[arg(short = 'c', long = "content")]
show_content: bool,
#[arg(short = 'n', long = "lines")]
context_lines: Option<usize>,
#[arg(short = 'e', long = "pattern", value_name = "PATTERN")]
text_pattern: Option<String>,
#[arg(short = 'E', long = "extended-regexp")]
extended_regexp: bool,
#[arg(short = 'F', long = "fixed-strings")]
fixed_strings: bool,
#[arg(short = 'w', long = "word-regexp")]
word_regexp: bool,
#[arg(short = 's', long = "case-sensitive")]
case_sensitive: bool,
#[arg(long = "exclude", value_name = "PATTERN")]
exclude_patterns: Vec<String>,
#[arg(long = "exclude-dir", value_name = "DIR")]
exclude_dirs: Vec<String>,
#[arg(long)]
code_only: bool,
#[arg(long = "semantic-only")]
no_fts: bool,
#[arg(long, value_name = "FLOAT")]
alpha: Option<f32>,
#[arg(long = "no-pool")]
no_pool: bool,
#[arg(long = "pool-factor", value_name = "FACTOR")]
pool_factor: Option<usize>,
#[arg(short = 'y', long = "yes")]
auto_confirm: bool,
#[arg(long = "static-batch")]
static_batch: bool,
},
#[command(after_help = STATUS_HELP)]
Status {
#[arg(default_value = ".")]
path: PathBuf,
},
#[command(after_help = CLEAR_HELP)]
Clear {
#[arg(default_value = ".")]
path: PathBuf,
#[arg(short = 'a', long)]
all: bool,
},
#[command(after_help = SET_MODEL_HELP)]
SetModel {
model: String,
},
Update,
#[command(after_help = INIT_HELP)]
Init {
#[arg(default_value = ".")]
path: PathBuf,
#[arg(long)]
model: Option<String>,
#[arg(long = "no-pool")]
no_pool: bool,
#[arg(long = "pool-factor", value_name = "FACTOR")]
pool_factor: Option<usize>,
#[arg(short = 'y', long = "yes")]
auto_confirm: bool,
#[arg(long = "batch-size", value_name = "SIZE")]
batch_size: Option<usize>,
#[arg(long = "encode-batch-size", value_name = "SIZE")]
encode_batch_size: Option<usize>,
#[arg(long = "index-chunk-size", value_name = "SIZE")]
index_chunk_size: Option<usize>,
#[arg(long = "static-batch")]
static_batch: bool,
},
#[command(name = "settings", after_help = CONFIG_HELP)]
Settings {
#[arg(long = "k")]
default_k: Option<usize>,
#[arg(long = "n")]
default_n: Option<usize>,
#[arg(long, conflicts_with = "int8")]
fp32: bool,
#[arg(long, conflicts_with = "fp32")]
int8: bool,
#[arg(long = "pool-factor", value_name = "FACTOR")]
pool_factor: Option<usize>,
#[arg(long = "parallel", value_name = "SESSIONS")]
parallel_sessions: Option<usize>,
#[arg(long = "batch-size", value_name = "SIZE")]
batch_size: Option<usize>,
#[arg(long = "max-recursion-depth", value_name = "DEPTH")]
max_recursion_depth: Option<usize>,
#[arg(long)]
verbose: bool,
#[arg(long = "no-verbose", conflicts_with = "verbose")]
no_verbose: bool,
#[arg(long = "relative-paths", conflicts_with = "no_relative_paths")]
relative_paths: bool,
#[arg(long = "no-relative-paths", conflicts_with = "relative_paths")]
no_relative_paths: bool,
#[arg(long = "hybrid-search", conflicts_with = "no_hybrid_search")]
hybrid_search: bool,
#[arg(long = "no-hybrid-search", conflicts_with = "hybrid_search")]
no_hybrid_search: bool,
#[arg(long, value_name = "FLOAT")]
alpha: Option<f32>,
#[arg(long = "ignore", value_name = "PATTERN")]
add_ignore: Vec<String>,
#[arg(long = "no-ignore", value_name = "PATTERN")]
remove_ignore: Vec<String>,
#[arg(long = "force-include", value_name = "PATTERN")]
add_force_include: Vec<String>,
#[arg(long = "no-force-include", value_name = "PATTERN")]
remove_force_include: Vec<String>,
#[arg(long = "clear-ignore")]
clear_ignore: bool,
#[arg(long = "clear-force-include")]
clear_force_include: bool,
},
}