manx_cli/
cli.rs

1use clap::{Parser, Subcommand};
2use std::path::PathBuf;
3
4#[derive(Parser)]
5#[command(
6    name = "manx",
7    about = "A blazing-fast CLI documentation finder",
8    long_about = r#"🚀 Intelligent documentation finder with native RAG and AI synthesis
9
10CORE COMMANDS:
11  snippet <lib> [query]          Search code snippets and examples (official + local docs)
12  search <query>                 Search official documentation across the web
13  doc <lib> [topic]              Browse comprehensive documentation  
14  get <id>                       Retrieve specific results by ID
15
16LOCAL RAG COMMANDS:
17  index <path>                   Index your documents for semantic search
18  sources list                   View indexed document sources
19  sources clear                  Clear all indexed documents
20
21EMBEDDING SYSTEM - Smart semantic search (works great out of the box):
22  embedding status               View current embedding configuration
23  embedding list                 Show installed models
24  embedding download <model>     Install neural models from HuggingFace
25  embedding test <query>         Test embedding quality
26
27DEFAULT MODE (No setup required):
28  ⚡ Hash-based embeddings       Built-in algorithm (0ms, offline, 0MB storage)
29  📚 Official documentation      Context7 API integration
30  🔍 Keyword matching           Excellent for exact phrases and terms
31
32ENHANCED MODE (Optional setup for better results):
33  🧠 Neural embeddings          Install: sentence-transformers/all-MiniLM-L6-v2
34  🎯 Semantic understanding     "database connection" = "data storage"
35  📊 Intent matching            Superior relevance ranking
36  🔄 Easy switching             manx embedding set onnx:model-name
37
38AI SYNTHESIS - Get comprehensive answers with citations (optional):
39  manx config --openai-api "sk-your-key"   Enable OpenAI GPT models
40  manx config --anthropic-api "sk-ant-key" Enable Anthropic Claude models
41  manx snippet react hooks                 Search + AI explanation (if configured)
42
43LOCAL RAG - Search your own documents and code (optional):
44  manx index /path/to/docs                 Index your documentation
45  manx config --rag on                     Enable local document search
46  manx search "authentication" --rag       Search indexed documents only
47
48QUICK START:
49  manx snippet react "state management"    Works great with defaults
50  manx embedding download all-MiniLM-L6-v2 Optional: Better semantic search
51  manx config --openai-api "sk-your-key"   Optional: AI synthesis
52
53Use 'manx <command> --help' for detailed options."#,
54    version = get_version_info(),
55    author,
56    arg_required_else_help = true
57)]
58pub struct Cli {
59    #[command(subcommand)]
60    pub command: Option<Commands>,
61
62    /// Show detailed debug information and API requests
63    #[arg(long, help_heading = "DEBUG OPTIONS")]
64    pub debug: bool,
65
66    /// Output JSON format (useful for scripts and automation)
67    #[arg(short = 'q', long, help_heading = "OUTPUT OPTIONS")]
68    pub quiet: bool,
69
70    /// Clear all cached documentation and start fresh
71    #[arg(long, help_heading = "CACHE OPTIONS")]
72    pub clear_cache: bool,
73
74    /// Enable automatic caching of all search results
75    #[arg(long, help_heading = "CACHE OPTIONS")]
76    pub auto_cache_on: bool,
77
78    /// Disable automatic caching (manual caching only)
79    #[arg(long, help_heading = "CACHE OPTIONS")]
80    pub auto_cache_off: bool,
81
82    /// Override API key for this session
83    #[arg(long, help_heading = "GLOBAL OPTIONS")]
84    pub api_key: Option<String>,
85
86    /// Override cache directory for this session
87    #[arg(long, help_heading = "GLOBAL OPTIONS")]
88    pub cache_dir: Option<PathBuf>,
89
90    /// Work offline using only cached results
91    #[arg(long, help_heading = "GLOBAL OPTIONS")]
92    pub offline: bool,
93}
94
95#[derive(Subcommand)]
96#[allow(clippy::large_enum_variant)]
97pub enum Commands {
98    /// 📚 Browse comprehensive documentation sections and guides
99    Doc {
100        /// Library name (examples: 'fastapi', 'react@18', 'django')
101        #[arg(value_name = "LIBRARY")]
102        library: String,
103        /// Topic to search for within documentation (optional - omit for general docs)
104        #[arg(value_name = "TOPIC", default_value = "")]
105        query: String,
106        /// Save documentation to file (auto-detects format)
107        #[arg(short = 'o', long, value_name = "FILE")]
108        output: Option<PathBuf>,
109        /// Limit number of sections shown (default: 10, use 0 for unlimited)
110        #[arg(short = 'l', long, value_name = "NUMBER")]
111        limit: Option<usize>,
112        /// Force retrieval-only mode (disable LLM synthesis even if API key configured)
113        #[arg(long)]
114        no_llm: bool,
115        /// Search locally indexed documents instead of Context7 API
116        #[arg(long)]
117        rag: bool,
118    },
119
120    /// 🔍 Search code snippets and examples with AI-powered understanding
121    ///
122    /// ENHANCED SEARCH:
123    ///   • Searches official docs (Context7) + your indexed documents (RAG)
124    ///   • Semantic understanding finds relevant content with different wording
125    ///   • Quote prioritization: "useEffect cleanup" gets 10x higher relevance
126    ///   • Optional AI synthesis provides comprehensive answers with citations
127    ///
128    /// SEMANTIC FEATURES:
129    ///   • "memory leaks" finds: "memory cleanup", "performance issues", "leak prevention"
130    ///   • "authentication" finds: "auth", "login", "security", "credentials"
131    ///   • Version-specific: react@18, django@4.2
132    ///
133    /// AI SYNTHESIS:
134    ///   • Configure: manx config --llm-api "sk-your-key"
135    ///   • Get answers: manx snippet "react hooks best practices"
136    ///   • Force retrieval: manx snippet react hooks --no-llm
137    ///
138    /// EXAMPLES:
139    ///   manx snippet react "useEffect cleanup"           # Semantic search with phrase priority
140    ///   manx snippet "database pooling" --llm-api        # Get AI answer with citations  
141    ///   manx snippet fastapi middleware --no-llm         # Raw results only
142    ///   manx snippet python "async functions" --rag      # Search your indexed code files
143    Snippet {
144        /// Library name (examples: 'fastapi', 'react@18', 'vue@3')
145        #[arg(value_name = "LIBRARY")]
146        library: String,
147        /// Search query for specific code snippets
148        #[arg(value_name = "QUERY")]
149        query: Option<String>,
150        /// Export results to file (format auto-detected by extension: .md, .json)
151        #[arg(short = 'o', long, value_name = "FILE")]
152        output: Option<PathBuf>,
153        /// Work offline using only cached results (no network requests)
154        #[arg(long)]
155        offline: bool,
156        /// Save specific search results by number (e.g., --save 1,3,7)
157        #[arg(long, value_name = "NUMBERS")]
158        save: Option<String>,
159        /// Save all search results to file
160        #[arg(long)]
161        save_all: bool,
162        /// Export in JSON format instead of Markdown (use with --save or --save-all)
163        #[arg(long)]
164        json: bool,
165        /// Limit number of results shown (default: 10, use 0 for unlimited)
166        #[arg(short = 'l', long, value_name = "NUMBER")]
167        limit: Option<usize>,
168        /// Force retrieval-only mode (disable LLM synthesis even if API key configured)
169        #[arg(long)]
170        no_llm: bool,
171        /// Search locally indexed documents instead of Context7 API (requires: manx config --rag-enabled)
172        #[arg(long)]
173        rag: bool,
174    },
175
176    /// 🔍 Search official documentation across the web
177    ///
178    /// INTELLIGENT WEB SEARCH:
179    ///   • Prioritizes official documentation sites (docs.python.org, reactjs.org, etc.)
180    ///   • Uses semantic embeddings for relevance matching  
181    ///   • Falls back to trusted community sources with clear notification
182    ///   • Optional LLM verification ensures result authenticity
183    ///
184    /// OFFICIAL-FIRST STRATEGY:
185    ///   • Always searches official sources first (10x relevance boost)
186    ///   • Expands to community sources only if insufficient official results
187    ///   • Transparent fallback notifications: "⚠️ Expanded to community sources"
188    ///
189    /// EXAMPLES:
190    ///   manx search "hydra configuration commands"      # Auto-detects LLM availability  
191    ///   manx search "react hooks best practices"        # Uses LLM if API key configured
192    ///   manx search "python async await" --no-llm       # Force embeddings-only mode
193    ///   manx search "authentication" --rag              # Search your indexed documents
194    Search {
195        /// Search query for official documentation
196        #[arg(value_name = "QUERY")]
197        query: String,
198        /// Disable LLM verification (use embeddings-only mode even if API key is configured)
199        #[arg(long)]
200        no_llm: bool,
201        /// Export results to file (format auto-detected by extension: .md, .json)
202        #[arg(short = 'o', long, value_name = "FILE")]
203        output: Option<PathBuf>,
204        /// Limit number of results shown (default: 8)
205        #[arg(short = 'l', long, value_name = "NUMBER")]
206        limit: Option<usize>,
207        /// Search locally indexed documents instead of web search (requires: manx config --rag-enabled)
208        #[arg(long)]
209        rag: bool,
210    },
211
212    /// 📥 Get specific item by ID (doc-3, section-5, etc.)
213    Get {
214        /// Item ID from previous search or doc command output
215        #[arg(value_name = "ITEM_ID")]
216        id: String,
217        /// Save retrieved item to file
218        #[arg(short = 'o', long, value_name = "FILE")]
219        output: Option<PathBuf>,
220    },
221
222    /// 🗂️ Manage local documentation cache
223    Cache {
224        #[command(subcommand)]
225        command: CacheCommands,
226    },
227
228    /// ⚙️ Configure Manx settings, API keys, and AI integration
229    Config {
230        /// Display current configuration settings
231        #[arg(long)]
232        show: bool,
233        /// Set Context7 API key (get one at context7.com)
234        #[arg(long, value_name = "KEY")]
235        api_key: Option<String>,
236        /// Set custom cache directory path
237        #[arg(long, value_name = "PATH")]
238        cache_dir: Option<PathBuf>,
239        /// Enable/disable automatic caching (values: on, off)
240        #[arg(long, value_name = "MODE")]
241        auto_cache: Option<String>,
242        /// Set cache expiration time in hours (default: 24)
243        #[arg(long, value_name = "HOURS")]
244        cache_ttl: Option<u64>,
245        /// Set maximum cache size in MB (default: 100)
246        #[arg(long, value_name = "SIZE")]
247        max_cache_size: Option<u64>,
248        /// Set OpenAI API key for GPT models
249        #[arg(long, value_name = "API_KEY")]
250        openai_api: Option<String>,
251        /// Set Anthropic API key for Claude models
252        #[arg(long, value_name = "API_KEY")]
253        anthropic_api: Option<String>,
254        /// Set Groq API key for fast inference
255        #[arg(long, value_name = "API_KEY")]
256        groq_api: Option<String>,
257        /// Set OpenRouter API key for multi-model access
258        #[arg(long, value_name = "API_KEY")]
259        openrouter_api: Option<String>,
260        /// Set HuggingFace API key for open-source models
261        #[arg(long, value_name = "API_KEY")]
262        huggingface_api: Option<String>,
263        /// Set custom endpoint URL for self-hosted models
264        #[arg(long, value_name = "URL")]
265        custom_endpoint: Option<String>,
266        /// Set preferred LLM provider (openai, anthropic, groq, openrouter, huggingface, custom, auto)
267        #[arg(long, value_name = "PROVIDER")]
268        llm_provider: Option<String>,
269        /// Set specific model name (overrides provider defaults)
270        #[arg(long, value_name = "MODEL")]
271        llm_model: Option<String>,
272        /// Legacy option - Set LLM API key (deprecated, use provider-specific options)
273        #[arg(long, value_name = "API_KEY")]
274        llm_api: Option<String>,
275        /// Enable/disable local RAG system (values: on, off)
276        #[arg(long, value_name = "MODE")]
277        rag: Option<String>,
278        /// Add custom official documentation domain (format: domain.com)
279        #[arg(long, value_name = "DOMAIN")]
280        add_official_domain: Option<String>,
281        /// Set embedding provider for RAG system (hash, onnx:model, ollama:model, openai:model, huggingface:model, custom:url)
282        #[arg(long, value_name = "PROVIDER")]
283        embedding_provider: Option<String>,
284        /// Set embedding API key for API-based providers
285        #[arg(long, value_name = "API_KEY")]
286        embedding_api_key: Option<String>,
287        /// Set embedding model path for local models
288        #[arg(long, value_name = "PATH")]
289        embedding_model_path: Option<PathBuf>,
290        /// Set embedding dimension (default: 384)
291        #[arg(long, value_name = "DIMENSION")]
292        embedding_dimension: Option<usize>,
293    },
294
295    /// 📁 Index local documents or web URLs for RAG search
296    ///
297    /// INDEXING SOURCES:
298    ///   • Local files: manx index ~/docs/api.md
299    ///   • Directories: manx index ~/documentation/  
300    ///   • Web URLs: manx index https://docs.rust-lang.org/book/ch01-01-installation.html
301    ///
302    /// SUPPORTED FORMATS:
303    ///   • Documents: .md, .txt, .rst, .docx, .pdf (with security validation)
304    ///   • Code files: .js, .jsx, .ts, .tsx, .py, .rb, .php, .java, .scala, .kotlin
305    ///                .rs, .go, .c, .cpp, .swift, .dart, .sql, .graphql, and more
306    ///   • Config: .json, .yaml, .yml, .toml, .xml, .ini, .env (with secret masking)
307    ///   • Scripts: .sh, .bash, .zsh, .ps1, .bat (with enhanced security validation)
308    ///   • Web content: HTML pages (auto text extraction)
309    ///
310    /// SECURITY FEATURES:
311    ///   • PDF processing disabled by default (configure to enable)
312    ///   • Code file security validation (obfuscation detection, malicious patterns)
313    ///   • Shell script dangerous command blocking
314    ///   • Automatic secret masking (.env files, API keys, tokens)
315    ///   • Prompt injection detection and prevention
316    ///   • URL validation (HTTP/HTTPS only)
317    ///   • Content sanitization and size limits
318    ///
319    /// EXAMPLES:
320    ///   manx index ~/my-docs/                              # Index directory
321    ///   manx index https://docs.python.org --crawl        # Basic documentation crawl
322    ///   manx index https://fastapi.tiangolo.com --crawl-depth 2  # Limited depth crawl
323    ///   manx index https://react.dev --crawl-all          # Complete site crawl
324    ///   manx index api.pdf --alias "API Reference"        # Index with custom alias
325    Index {
326        /// Path to document/directory or URL to index
327        #[arg(value_name = "PATH_OR_URL")]
328        path: String,
329        /// Optional alias for the indexed source
330        #[arg(long, value_name = "ALIAS")]
331        id: Option<String>,
332        /// Enable basic crawling for URLs (follows links with default depth)
333        #[arg(long)]
334        crawl: bool,
335        /// Crawl with specific depth (implies crawling)
336        #[arg(long, value_name = "DEPTH")]
337        crawl_depth: Option<u32>,
338        /// Crawl entire documentation site (implies crawling)
339        #[arg(long)]
340        crawl_all: bool,
341    },
342
343    /// 📂 Manage indexed document sources
344    Sources {
345        #[command(subcommand)]
346        command: SourceCommands,
347    },
348
349    /// 🔗 Open a specific documentation section by ID
350    Open {
351        /// Section ID from previous doc command output
352        #[arg(value_name = "SECTION_ID")]
353        id: String,
354        /// Save opened section to file
355        #[arg(short = 'o', long, value_name = "FILE")]
356        output: Option<PathBuf>,
357    },
358
359    /// 🔄 Update Manx to the latest version from GitHub
360    Update {
361        /// Check for updates without installing
362        #[arg(long)]
363        check: bool,
364        /// Force update even if already on latest version
365        #[arg(long)]
366        force: bool,
367    },
368
369    /// 🧠 Manage embedding models and providers for semantic search
370    ///
371    /// EMBEDDING PROVIDERS:
372    ///   • hash: Hash-based embeddings (default, fast, lightweight)
373    ///   • onnx:model: Local ONNX models (requires download)
374    ///   • ollama:model: Ollama API (requires Ollama server)
375    ///   • openai:model: OpenAI embeddings API (requires API key)
376    ///   • huggingface:model: HuggingFace embeddings API (requires API key)
377    ///   • custom:url: Custom endpoint API
378    ///
379    /// EXAMPLES:
380    ///   manx embedding status                     # Show current provider and models
381    ///   manx embedding set hash                   # Use hash-based (default)
382    ///   manx embedding set onnx:all-MiniLM-L6-v2 # Use local ONNX model
383    ///   manx embedding set ollama:nomic-embed-text # Use Ollama model
384    ///   manx embedding download all-MiniLM-L6-v2  # Download ONNX model
385    ///   manx embedding test "sample query"        # Test current embedding setup
386    Embedding {
387        #[command(subcommand)]
388        command: EmbeddingCommands,
389    },
390
391    /// 🚀 Interactive setup wizard for configuring manx
392    ///
393    /// WHAT IT DOES:
394    ///   • Guides you through initial configuration
395    ///   • Sets up Context7 API for official docs
396    ///   • Configures embedding models for semantic search
397    ///   • Optional LLM provider setup for AI synthesis
398    ///
399    /// PERFECT FOR:
400    ///   • First-time users
401    ///   • Reconfiguring after updates
402    ///   • Testing different providers
403    ///
404    /// EXAMPLE:
405    ///   manx init     # Start interactive setup wizard
406    Init,
407}
408
409#[derive(Subcommand)]
410pub enum CacheCommands {
411    /// Remove all cached documentation and free up disk space
412    Clear,
413    /// Display cache size, file count, and storage statistics  
414    Stats,
415    /// Show all currently cached libraries and their sizes
416    List,
417}
418
419#[derive(Subcommand)]
420pub enum SourceCommands {
421    /// List all indexed document sources
422    List,
423    /// Add a document source to the index
424    Add {
425        /// Path to document or directory
426        path: PathBuf,
427        /// Optional alias for the source
428        #[arg(long)]
429        id: Option<String>,
430    },
431    /// Clear all indexed documents
432    Clear,
433}
434
435#[derive(Subcommand)]
436pub enum EmbeddingCommands {
437    /// Show current embedding provider status and configuration
438    Status,
439    /// Set embedding provider (hash, onnx:model, ollama:model, openai:model, huggingface:model, custom:url)
440    Set {
441        /// Provider specification
442        #[arg(value_name = "PROVIDER")]
443        provider: String,
444        /// API key for API-based providers
445        #[arg(long, value_name = "API_KEY")]
446        api_key: Option<String>,
447        /// Custom endpoint URL (for custom provider)
448        #[arg(long, value_name = "URL")]
449        endpoint: Option<String>,
450        /// Embedding dimension (default: 384)
451        #[arg(long, value_name = "DIMENSION")]
452        dimension: Option<usize>,
453    },
454    /// Download and install a local ONNX model
455    Download {
456        /// Model name to download (e.g., 'all-MiniLM-L6-v2')
457        #[arg(value_name = "MODEL_NAME")]
458        model: String,
459        /// Force redownload if model already exists
460        #[arg(long)]
461        force: bool,
462    },
463    /// List available models for download or installed models
464    List {
465        /// List available models for download instead of installed models
466        #[arg(long)]
467        available: bool,
468    },
469    /// Test current embedding setup with a sample query
470    Test {
471        /// Query text to test embeddings with
472        #[arg(value_name = "QUERY")]
473        query: String,
474        /// Show detailed embedding vector information
475        #[arg(long)]
476        verbose: bool,
477    },
478    /// Remove downloaded local models
479    Remove {
480        /// Model name to remove
481        #[arg(value_name = "MODEL_NAME")]
482        model: String,
483    },
484}
485
486impl Cli {
487    pub fn parse_args() -> Self {
488        Cli::parse()
489    }
490}
491
492fn get_version_info() -> &'static str {
493    concat!(
494        "\n",
495        "__| |__________________________________________________________________________| |__\n",
496        "__   __________________________________________________________________________   __\n",
497        "  | |                                                                          | |  \n",
498        "  | |       ███        ██████   ██████   █████████   ██████   █████ █████ █████| |  \n",
499        "  | |      ░░░██      ░░██████ ██████   ███░░░░░███ ░░██████ ░░███ ░░███ ░░███ | |  \n",
500        "  | | ██     ░░██      ░███░█████░███  ░███    ░███  ░███░███ ░███  ░░███ ███  | |  \n",
501        "  | |░░       ░░███    ░███░░███ ░███  ░███████████  ░███░░███░███   ░░█████   | |  \n",
502        "  | |          ██░     ░███ ░░░  ░███  ░███░░░░░███  ░███ ░░██████    ███░███  | |  \n",
503        "  | |         ██       ░███      ░███  ░███    ░███  ░███  ░░█████   ███ ░░███ | |  \n",
504        "  | | ██    ███        █████     █████ █████   █████ █████  ░░█████ █████ █████| |  \n",
505        "  | |░░    ░░░        ░░░░░     ░░░░░ ░░░░░   ░░░░░ ░░░░░    ░░░░░ ░░░░░ ░░░░░ | |  \n",
506        "__| |__________________________________________________________________________| |__\n",
507        "__   __________________________________________________________________________   __\n",
508        "  | |                                                                          | |  \n",
509        "\n",
510        "  v",
511        env!("CARGO_PKG_VERSION"),
512        " • blazing-fast docs finder\n"
513    )
514}