manx_cli/
cli.rs

1use clap::{Parser, Subcommand};
2use std::path::PathBuf;
3
4#[derive(Parser)]
5#[command(
6    name = "manx",
7    about = "A blazing-fast CLI documentation finder",
8    long_about = r#"🚀 Intelligent documentation finder with native RAG and AI synthesis
9
10CORE COMMANDS:
11  snippet <lib> [query]          Search code snippets and examples (official + local docs)
12  search <query>                 Search official documentation across the web
13  doc <lib> [topic]              Browse comprehensive documentation  
14  get <id>                       Retrieve specific results by ID
15
16LOCAL RAG COMMANDS:
17  index <path>                   Index your documents for semantic search
18  sources list                   View indexed document sources
19  sources clear                  Clear all indexed documents
20
21EMBEDDING SYSTEM - Smart semantic search (works great out of the box):
22  embedding status               View current embedding configuration
23  embedding list                 Show installed models
24  embedding download <model>     Install neural models from HuggingFace
25  embedding test <query>         Test embedding quality
26
27DEFAULT MODE (No setup required):
28  ⚡ Hash-based embeddings       Built-in algorithm (0ms, offline, 0MB storage)
29  📚 Official documentation      Context7 API integration
30  🔍 Keyword matching           Excellent for exact phrases and terms
31
32ENHANCED MODE (Optional setup for better results):
33  🧠 Neural embeddings          Install: sentence-transformers/all-MiniLM-L6-v2
34  🎯 Semantic understanding     "database connection" = "data storage"
35  📊 Intent matching            Superior relevance ranking
36  🔄 Easy switching             manx config --embedding-provider onnx:model-name
37
38AI SYNTHESIS - Get comprehensive answers with citations (optional):
39  manx config --llm-api "sk-key"           Enable AI answer synthesis
40  manx snippet react hooks                 Search + AI explanation (if configured)
41
42LOCAL RAG - Search your own documents and code (optional):
43  manx index /path/to/docs                 Index your documentation
44  manx config --rag-enabled                Enable local document search
45  manx search "authentication" --rag       Search indexed documents only
46
47QUICK START:
48  manx snippet react "state management"    Works great with defaults
49  manx embedding download all-MiniLM-L6-v2 Optional: Better semantic search
50  manx config --llm-api "sk-openai-key"    Optional: AI synthesis
51
52Use 'manx <command> --help' for detailed options."#,
53    version = get_version_info(),
54    author,
55    arg_required_else_help = true
56)]
57pub struct Cli {
58    #[command(subcommand)]
59    pub command: Option<Commands>,
60
61    /// Show detailed debug information and API requests
62    #[arg(long, help_heading = "DEBUG OPTIONS")]
63    pub debug: bool,
64
65    /// Output JSON format (useful for scripts and automation)
66    #[arg(short = 'q', long, help_heading = "OUTPUT OPTIONS")]
67    pub quiet: bool,
68
69    /// Clear all cached documentation and start fresh
70    #[arg(long, help_heading = "CACHE OPTIONS")]
71    pub clear_cache: bool,
72
73    /// Enable automatic caching of all search results
74    #[arg(long, help_heading = "CACHE OPTIONS")]
75    pub auto_cache_on: bool,
76
77    /// Disable automatic caching (manual caching only)
78    #[arg(long, help_heading = "CACHE OPTIONS")]
79    pub auto_cache_off: bool,
80
81    /// Override API key for this session
82    #[arg(long, help_heading = "GLOBAL OPTIONS")]
83    pub api_key: Option<String>,
84
85    /// Override cache directory for this session
86    #[arg(long, help_heading = "GLOBAL OPTIONS")]
87    pub cache_dir: Option<PathBuf>,
88
89    /// Work offline using only cached results
90    #[arg(long, help_heading = "GLOBAL OPTIONS")]
91    pub offline: bool,
92}
93
94#[derive(Subcommand)]
95#[allow(clippy::large_enum_variant)]
96pub enum Commands {
97    /// 📚 Browse comprehensive documentation sections and guides
98    Doc {
99        /// Library name (examples: 'fastapi', 'react@18', 'django')
100        #[arg(value_name = "LIBRARY")]
101        library: String,
102        /// Topic to search for within documentation (optional - omit for general docs)
103        #[arg(value_name = "TOPIC", default_value = "")]
104        query: String,
105        /// Save documentation to file (auto-detects format)
106        #[arg(short = 'o', long, value_name = "FILE")]
107        output: Option<PathBuf>,
108        /// Limit number of sections shown (default: 10, use 0 for unlimited)
109        #[arg(short = 'l', long, value_name = "NUMBER")]
110        limit: Option<usize>,
111        /// Force retrieval-only mode (disable LLM synthesis even if API key configured)
112        #[arg(long)]
113        no_llm: bool,
114        /// Search locally indexed documents instead of Context7 API
115        #[arg(long)]
116        rag: bool,
117    },
118
119    /// 🔍 Search code snippets and examples with AI-powered understanding
120    ///
121    /// ENHANCED SEARCH:
122    ///   • Searches official docs (Context7) + your indexed documents (RAG)
123    ///   • Semantic understanding finds relevant content with different wording
124    ///   • Quote prioritization: "useEffect cleanup" gets 10x higher relevance
125    ///   • Optional AI synthesis provides comprehensive answers with citations
126    ///
127    /// SEMANTIC FEATURES:
128    ///   • "memory leaks" finds: "memory cleanup", "performance issues", "leak prevention"
129    ///   • "authentication" finds: "auth", "login", "security", "credentials"
130    ///   • Version-specific: react@18, django@4.2
131    ///
132    /// AI SYNTHESIS:
133    ///   • Configure: manx config --llm-api "sk-your-key"
134    ///   • Get answers: manx snippet "react hooks best practices"
135    ///   • Force retrieval: manx snippet react hooks --no-llm
136    ///
137    /// EXAMPLES:
138    ///   manx snippet react "useEffect cleanup"           # Semantic search with phrase priority
139    ///   manx snippet "database pooling" --llm-api        # Get AI answer with citations  
140    ///   manx snippet fastapi middleware --no-llm         # Raw results only
141    ///   manx snippet python "async functions" --rag      # Search your indexed code files
142    Snippet {
143        /// Library name (examples: 'fastapi', 'react@18', 'vue@3')
144        #[arg(value_name = "LIBRARY")]
145        library: String,
146        /// Search query for specific code snippets
147        #[arg(value_name = "QUERY")]
148        query: Option<String>,
149        /// Export results to file (format auto-detected by extension: .md, .json)
150        #[arg(short = 'o', long, value_name = "FILE")]
151        output: Option<PathBuf>,
152        /// Work offline using only cached results (no network requests)
153        #[arg(long)]
154        offline: bool,
155        /// Save specific search results by number (e.g., --save 1,3,7)
156        #[arg(long, value_name = "NUMBERS")]
157        save: Option<String>,
158        /// Save all search results to file
159        #[arg(long)]
160        save_all: bool,
161        /// Export in JSON format instead of Markdown (use with --save or --save-all)
162        #[arg(long)]
163        json: bool,
164        /// Limit number of results shown (default: 10, use 0 for unlimited)
165        #[arg(short = 'l', long, value_name = "NUMBER")]
166        limit: Option<usize>,
167        /// Force retrieval-only mode (disable LLM synthesis even if API key configured)
168        #[arg(long)]
169        no_llm: bool,
170        /// Search locally indexed documents instead of Context7 API (requires: manx config --rag-enabled)
171        #[arg(long)]
172        rag: bool,
173    },
174
175    /// 🔍 Search official documentation across the web
176    ///
177    /// INTELLIGENT WEB SEARCH:
178    ///   • Prioritizes official documentation sites (docs.python.org, reactjs.org, etc.)
179    ///   • Uses semantic embeddings for relevance matching  
180    ///   • Falls back to trusted community sources with clear notification
181    ///   • Optional LLM verification ensures result authenticity
182    ///
183    /// OFFICIAL-FIRST STRATEGY:
184    ///   • Always searches official sources first (10x relevance boost)
185    ///   • Expands to community sources only if insufficient official results
186    ///   • Transparent fallback notifications: "⚠️ Expanded to community sources"
187    ///
188    /// EXAMPLES:
189    ///   manx search "hydra configuration commands"      # Auto-detects LLM availability  
190    ///   manx search "react hooks best practices"        # Uses LLM if API key configured
191    ///   manx search "python async await" --no-llm       # Force embeddings-only mode
192    ///   manx search "authentication" --rag              # Search your indexed documents
193    Search {
194        /// Search query for official documentation
195        #[arg(value_name = "QUERY")]
196        query: String,
197        /// Disable LLM verification (use embeddings-only mode even if API key is configured)
198        #[arg(long)]
199        no_llm: bool,
200        /// Export results to file (format auto-detected by extension: .md, .json)
201        #[arg(short = 'o', long, value_name = "FILE")]
202        output: Option<PathBuf>,
203        /// Limit number of results shown (default: 8)
204        #[arg(short = 'l', long, value_name = "NUMBER")]
205        limit: Option<usize>,
206        /// Search locally indexed documents instead of web search (requires: manx config --rag-enabled)
207        #[arg(long)]
208        rag: bool,
209    },
210
211    /// 📥 Get specific item by ID (doc-3, section-5, etc.)
212    Get {
213        /// Item ID from previous search or doc command output
214        #[arg(value_name = "ITEM_ID")]
215        id: String,
216        /// Save retrieved item to file
217        #[arg(short = 'o', long, value_name = "FILE")]
218        output: Option<PathBuf>,
219    },
220
221    /// 🗂️ Manage local documentation cache
222    Cache {
223        #[command(subcommand)]
224        command: CacheCommands,
225    },
226
227    /// ⚙️ Configure Manx settings, API keys, and AI integration
228    Config {
229        /// Display current configuration settings
230        #[arg(long)]
231        show: bool,
232        /// Set Context7 API key (get one at context7.com)
233        #[arg(long, value_name = "KEY")]
234        api_key: Option<String>,
235        /// Set custom cache directory path
236        #[arg(long, value_name = "PATH")]
237        cache_dir: Option<PathBuf>,
238        /// Enable/disable automatic caching (values: on, off)
239        #[arg(long, value_name = "MODE")]
240        auto_cache: Option<String>,
241        /// Set cache expiration time in hours (default: 24)
242        #[arg(long, value_name = "HOURS")]
243        cache_ttl: Option<u64>,
244        /// Set maximum cache size in MB (default: 100)
245        #[arg(long, value_name = "SIZE")]
246        max_cache_size: Option<u64>,
247        /// Set OpenAI API key for GPT models
248        #[arg(long, value_name = "API_KEY")]
249        openai_api: Option<String>,
250        /// Set Anthropic API key for Claude models
251        #[arg(long, value_name = "API_KEY")]
252        anthropic_api: Option<String>,
253        /// Set Groq API key for fast inference
254        #[arg(long, value_name = "API_KEY")]
255        groq_api: Option<String>,
256        /// Set OpenRouter API key for multi-model access
257        #[arg(long, value_name = "API_KEY")]
258        openrouter_api: Option<String>,
259        /// Set HuggingFace API key for open-source models
260        #[arg(long, value_name = "API_KEY")]
261        huggingface_api: Option<String>,
262        /// Set custom endpoint URL for self-hosted models
263        #[arg(long, value_name = "URL")]
264        custom_endpoint: Option<String>,
265        /// Set preferred LLM provider (openai, anthropic, groq, openrouter, huggingface, custom, auto)
266        #[arg(long, value_name = "PROVIDER")]
267        llm_provider: Option<String>,
268        /// Set specific model name (overrides provider defaults)
269        #[arg(long, value_name = "MODEL")]
270        llm_model: Option<String>,
271        /// Legacy option - Set LLM API key (deprecated, use provider-specific options)
272        #[arg(long, value_name = "API_KEY")]
273        llm_api: Option<String>,
274        /// Enable/disable local RAG system (values: on, off)
275        #[arg(long, value_name = "MODE")]
276        rag: Option<String>,
277        /// Add custom official documentation domain (format: domain.com)
278        #[arg(long, value_name = "DOMAIN")]
279        add_official_domain: Option<String>,
280        /// Set embedding provider for RAG system (hash, onnx:model, ollama:model, openai:model, huggingface:model, custom:url)
281        #[arg(long, value_name = "PROVIDER")]
282        embedding_provider: Option<String>,
283        /// Set embedding API key for API-based providers
284        #[arg(long, value_name = "API_KEY")]
285        embedding_api_key: Option<String>,
286        /// Set embedding model path for local models
287        #[arg(long, value_name = "PATH")]
288        embedding_model_path: Option<PathBuf>,
289        /// Set embedding dimension (default: 384)
290        #[arg(long, value_name = "DIMENSION")]
291        embedding_dimension: Option<usize>,
292    },
293
294    /// 📁 Index local documents or web URLs for RAG search
295    ///
296    /// INDEXING SOURCES:
297    ///   • Local files: manx index ~/docs/api.md
298    ///   • Directories: manx index ~/documentation/  
299    ///   • Web URLs: manx index https://docs.rust-lang.org/book/ch01-01-installation.html
300    ///
301    /// SUPPORTED FORMATS:
302    ///   • Documents: .md, .txt, .docx, .pdf (with security validation)
303    ///   • Web content: HTML pages (auto text extraction)
304    ///
305    /// SECURITY FEATURES:
306    ///   • PDF processing disabled by default (configure to enable)  
307    ///   • URL validation (HTTP/HTTPS only)
308    ///   • Content sanitization and size limits
309    ///
310    /// EXAMPLES:
311    ///   manx index ~/my-docs/                              # Index directory
312    ///   manx index https://docs.python.org --crawl        # Deep crawl documentation site
313    ///   manx index https://fastapi.tiangolo.com --crawl --max-depth 2  # Limited depth crawl
314    ///   manx index api.pdf --alias "API Reference"        # Index with custom alias
315    Index {
316        /// Path to document/directory or URL to index
317        #[arg(value_name = "PATH_OR_URL")]
318        path: String,
319        /// Optional alias for the indexed source
320        #[arg(long, value_name = "ALIAS")]
321        id: Option<String>,
322        /// Enable deep crawling for URLs (follows links to discover more pages)
323        #[arg(long)]
324        crawl: bool,
325        /// Maximum crawl depth for deep crawling (default: 3)
326        #[arg(long, value_name = "DEPTH")]
327        max_depth: Option<u32>,
328        /// Maximum number of pages to crawl (default: no limit)
329        #[arg(long, value_name = "PAGES")]
330        max_pages: Option<u32>,
331    },
332
333    /// 📂 Manage indexed document sources
334    Sources {
335        #[command(subcommand)]
336        command: SourceCommands,
337    },
338
339    /// 🔗 Open a specific documentation section by ID
340    Open {
341        /// Section ID from previous doc command output
342        #[arg(value_name = "SECTION_ID")]
343        id: String,
344        /// Save opened section to file
345        #[arg(short = 'o', long, value_name = "FILE")]
346        output: Option<PathBuf>,
347    },
348
349    /// 🔄 Update Manx to the latest version from GitHub
350    Update {
351        /// Check for updates without installing
352        #[arg(long)]
353        check: bool,
354        /// Force update even if already on latest version
355        #[arg(long)]
356        force: bool,
357    },
358
359    /// 🧠 Manage embedding models and providers for semantic search
360    ///
361    /// EMBEDDING PROVIDERS:
362    ///   • hash: Hash-based embeddings (default, fast, lightweight)
363    ///   • onnx:model: Local ONNX models (requires download)
364    ///   • ollama:model: Ollama API (requires Ollama server)
365    ///   • openai:model: OpenAI embeddings API (requires API key)
366    ///   • huggingface:model: HuggingFace embeddings API (requires API key)
367    ///   • custom:url: Custom endpoint API
368    ///
369    /// EXAMPLES:
370    ///   manx embedding status                     # Show current provider and models
371    ///   manx embedding set hash                   # Use hash-based (default)
372    ///   manx embedding set onnx:all-MiniLM-L6-v2 # Use local ONNX model
373    ///   manx embedding set ollama:nomic-embed-text # Use Ollama model
374    ///   manx embedding download all-MiniLM-L6-v2  # Download ONNX model
375    ///   manx embedding test "sample query"        # Test current embedding setup
376    Embedding {
377        #[command(subcommand)]
378        command: EmbeddingCommands,
379    },
380}
381
382#[derive(Subcommand)]
383pub enum CacheCommands {
384    /// Remove all cached documentation and free up disk space
385    Clear,
386    /// Display cache size, file count, and storage statistics  
387    Stats,
388    /// Show all currently cached libraries and their sizes
389    List,
390}
391
392#[derive(Subcommand)]
393pub enum SourceCommands {
394    /// List all indexed document sources
395    List,
396    /// Add a document source to the index
397    Add {
398        /// Path to document or directory
399        path: PathBuf,
400        /// Optional alias for the source
401        #[arg(long)]
402        id: Option<String>,
403    },
404    /// Clear all indexed documents
405    Clear,
406}
407
408#[derive(Subcommand)]
409pub enum EmbeddingCommands {
410    /// Show current embedding provider status and configuration
411    Status,
412    /// Set embedding provider (hash, onnx:model, ollama:model, openai:model, huggingface:model, custom:url)
413    Set {
414        /// Provider specification
415        #[arg(value_name = "PROVIDER")]
416        provider: String,
417        /// API key for API-based providers
418        #[arg(long, value_name = "API_KEY")]
419        api_key: Option<String>,
420        /// Custom endpoint URL (for custom provider)
421        #[arg(long, value_name = "URL")]
422        endpoint: Option<String>,
423        /// Embedding dimension (default: 384)
424        #[arg(long, value_name = "DIMENSION")]
425        dimension: Option<usize>,
426    },
427    /// Download and install a local ONNX model
428    Download {
429        /// Model name to download (e.g., 'all-MiniLM-L6-v2')
430        #[arg(value_name = "MODEL_NAME")]
431        model: String,
432        /// Force redownload if model already exists
433        #[arg(long)]
434        force: bool,
435    },
436    /// List available models for download or installed models
437    List {
438        /// List available models for download instead of installed models
439        #[arg(long)]
440        available: bool,
441    },
442    /// Test current embedding setup with a sample query
443    Test {
444        /// Query text to test embeddings with
445        #[arg(value_name = "QUERY")]
446        query: String,
447        /// Show detailed embedding vector information
448        #[arg(long)]
449        verbose: bool,
450    },
451    /// Remove downloaded local models
452    Remove {
453        /// Model name to remove
454        #[arg(value_name = "MODEL_NAME")]
455        model: String,
456    },
457}
458
459impl Cli {
460    pub fn parse_args() -> Self {
461        Cli::parse()
462    }
463}
464
465fn get_version_info() -> &'static str {
466    concat!(
467        "\n",
468        "__| |__________________________________________________________________________| |__\n",
469        "__   __________________________________________________________________________   __\n",
470        "  | |                                                                          | |  \n",
471        "  | |       ███        ██████   ██████   █████████   ██████   █████ █████ █████| |  \n",
472        "  | |      ░░░██      ░░██████ ██████   ███░░░░░███ ░░██████ ░░███ ░░███ ░░███ | |  \n",
473        "  | | ██     ░░██      ░███░█████░███  ░███    ░███  ░███░███ ░███  ░░███ ███  | |  \n",
474        "  | |░░       ░░███    ░███░░███ ░███  ░███████████  ░███░░███░███   ░░█████   | |  \n",
475        "  | |          ██░     ░███ ░░░  ░███  ░███░░░░░███  ░███ ░░██████    ███░███  | |  \n",
476        "  | |         ██       ░███      ░███  ░███    ░███  ░███  ░░█████   ███ ░░███ | |  \n",
477        "  | | ██    ███        █████     █████ █████   █████ █████  ░░█████ █████ █████| |  \n",
478        "  | |░░    ░░░        ░░░░░     ░░░░░ ░░░░░   ░░░░░ ░░░░░    ░░░░░ ░░░░░ ░░░░░ | |  \n",
479        "__| |__________________________________________________________________________| |__\n",
480        "__   __________________________________________________________________________   __\n",
481        "  | |                                                                          | |  \n",
482        "\n",
483        "  v",
484        env!("CARGO_PKG_VERSION"),
485        " • blazing-fast docs finder\n"
486    )
487}
manx_cli/cli.rs

manx_cli/
cli.rs