colgrep 1.5.1

Semantic code search powered by ColBERT
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
use std::path::PathBuf;

use clap::{Parser, Subcommand};

use crate::color::ColorChoice;

pub const MAIN_HELP: &str = "\
EXAMPLES:
    # Search for code semantically (auto-indexes if needed)
    colgrep \"function that handles user authentication\"
    colgrep \"error handling for database connections\"

    # Search in a specific directory
    colgrep \"API endpoints\" ./backend

    # Search in a specific file (or multiple files)
    colgrep \"error handling\" ./src/main.rs
    colgrep \"auth\" ./src/auth.rs ./src/login.rs

    # Search in multiple directories (results are merged by score)
    colgrep \"error handling\" ./src ./lib ./api
    colgrep -e \"Result\" \"error\" ./crate-a ./crate-b -k 20

    # Filter by file type (grep-like)
    colgrep \"parse config\" --include \"*.rs\"
    colgrep \"test helpers\" --include \"*_test.go\"

    # Hybrid search: grep first, then rank semantically
    colgrep \"usage\" -e \"async fn\"
    colgrep \"error\" -e \"panic!\" --include \"*.rs\"

    # List only matching files (like grep -l)
    colgrep -l \"database queries\"

    # Show full function/class content
    colgrep -c \"parse config\"
    colgrep --content \"authentication handler\" -k 5

    # Output as JSON for scripting
    colgrep --json \"authentication\" | jq '.[] | .unit.file'

    # Build or update index (without searching)
    colgrep init
    colgrep init ~/projects/myapp

    # Check index status
    colgrep status

    # Clear index
    colgrep clear
    colgrep clear --all

    # Change default model (clears existing indexes)
    colgrep set-model lightonai/LateOn-Code-edge
    HF_TOKEN=hf_xxx colgrep set-model myorg/private-model

    # Update to the latest version
    colgrep update

SUPPORTED LANGUAGES:
    Python, Rust, TypeScript, JavaScript, Go, Java, C, C++, C#, Ruby,
    PHP, Swift, Kotlin, Scala, Lua, Elixir, Haskell, OCaml, QML, R, Zig,
    Julia, Shell/Bash, SQL, Markdown, Plain text

ENVIRONMENT:
    Indexes are stored in ~/.local/share/colgrep/ (or $XDG_DATA_HOME/colgrep)
    Config is stored in ~/.config/colgrep/ (or $XDG_CONFIG_HOME/colgrep)";

pub const SEARCH_HELP: &str = "\
EXAMPLES:
    # Basic semantic search
    colgrep search \"function that handles authentication\"
    colgrep search \"error handling\" ./backend

    # Filter by file type
    colgrep search \"parse config\" --include \"*.rs\"
    colgrep search \"API handler\" --include \"*.go\"

    # Hybrid search (grep + semantic ranking)
    colgrep search \"usage\" -e \"async fn\"
    colgrep search \"error\" -e \"Result<\" --include \"*.rs\"

    # List only matching files
    colgrep search -l \"database operations\"

    # Show full function/class content
    colgrep search -c \"parse config\"
    colgrep search --content \"handler\" -k 5

    # More results
    colgrep search -k 20 \"logging utilities\"

    # JSON output for scripting
    colgrep search --json \"auth\" | jq '.[0].unit.name'

GREP COMPATIBILITY:
    -r, --recursive    Enabled by default (for grep users)
    -l, --files-only   Show only filenames, like grep -l
    -c, --content      Show syntax-highlighted content (up to 50 lines)
    -e, --pattern      Pre-filter with regex pattern (ERE syntax by default)
    -E, --extended-regexp  Kept for compatibility (regex is now default)
    -F, --fixed-strings    Interpret -e pattern as literal string (no regex)
    -w, --word-regexp      Match whole words only for -e pattern
    --include          Filter files by glob pattern
    --exclude          Exclude files matching pattern
    --exclude-dir      Exclude directories (supports glob patterns: vendor, */plugins, **/test_*)";

pub const STATUS_HELP: &str = "\
EXAMPLES:
    colgrep status
    colgrep status ~/projects/myapp";

pub const CLEAR_HELP: &str = "\
EXAMPLES:
    # Clear index for current directory
    colgrep clear

    # Clear index for specific project
    colgrep clear ~/projects/myapp

    # Clear ALL indexes
    colgrep clear -a
    colgrep clear --all";

pub const SET_MODEL_HELP: &str = "\
EXAMPLES:
    # Set default model (public HuggingFace model)
    colgrep set-model lightonai/LateOn-Code-edge

    # Use a different public model
    colgrep set-model colbert-ir/colbertv2.0

    # Use a private HuggingFace model (requires HF_TOKEN)
    HF_TOKEN=hf_xxx colgrep set-model myorg/private-model

    # Use a local model directory
    colgrep set-model /path/to/local/model

AUTHENTICATION:
    For private HuggingFace models, set your token via environment variable:
    • HF_TOKEN=hf_xxx colgrep set-model org/private-model
    • export HF_TOKEN=hf_xxx && colgrep set-model org/private-model

    Token priority: HF_TOKEN > HUGGING_FACE_HUB_TOKEN > ~/.cache/huggingface/token

NOTES:
    • Changing models clears all existing indexes (embedding dimensions differ)
    • The model is downloaded from HuggingFace and cached automatically
    • Model preference is stored in ~/.config/colgrep/config.json
    • Use 'colgrep settings' to view the current model";

pub const INIT_HELP: &str = "\
EXAMPLES:
    # Build or update index for the current directory
    colgrep init

    # Build or update index for a specific project
    colgrep init ~/projects/myapp

    # Force auto-confirm for large codebases
    colgrep init -y

    # Use a specific model
    colgrep init --model lightonai/LateOn-Code-edge

    # Override model/session batch size for this run
    colgrep init --batch-size 2

    # Override outer encoding batch size for benchmarking/tuning
    colgrep init --encode-batch-size 1024

    # Override outer index chunk size for benchmarking/tuning
    colgrep init --index-chunk-size 4096

    # Compare batch sort orders during encoding
    colgrep init --batch-sort-order big-first
    colgrep init --batch-sort-order small-first

NOTES:
    • Creates a new index if none exists
    • Incrementally updates the index if files changed
    • Useful for pre-warming the index before searching
    • Subsequent searches will be fast since the index is already built";

pub const CONFIG_HELP: &str = "\
EXAMPLES:
    # Show current configuration
    colgrep settings

    # Set default number of results
    colgrep settings --k 20

    # Enable verbose output by default (full content with syntax highlighting)
    colgrep settings --verbose

    # Disable verbose output (compact filepath:lines format, this is the default)
    colgrep settings --no-verbose

    # Switch to INT8 quantized model (faster inference)
    colgrep settings --int8

    # Switch back to full-precision (FP32) model (default)
    colgrep settings --fp32

    # Set embedding pool factor (smaller index, faster search)
    colgrep settings --pool-factor 2

    # Disable embedding pooling (larger index, more precise)
    colgrep settings --pool-factor 1

    # Set parallel encoding sessions (default: auto-detected CPU count)
    colgrep settings --parallel 8

    # Set batch size per session (default: 1)
    colgrep settings --batch-size 2

    # Set parser recursion guard depth (default: 1024)
    colgrep settings --max-recursion-depth 1024

    # Set both at once
    colgrep settings --k 25 --n 8

    # Reset to defaults (unset)
    colgrep settings --k 0 --n 0

    # Add extra ignore patterns (on top of built-in defaults)
    colgrep settings --ignore generated --ignore \"*.pb.go\"

    # Remove an extra ignore pattern
    colgrep settings --no-ignore generated

    # Clear all extra ignore patterns (revert to defaults only)
    colgrep settings --clear-ignore

    # Force-include files/dirs that are normally ignored
    colgrep settings --force-include .vscode --force-include vendor/internal

    # Remove a force-include pattern
    colgrep settings --no-force-include .vscode

    # Clear all force-include patterns
    colgrep settings --clear-force-include

    # Show absolute paths in search output
    colgrep settings --no-relative-paths

    # Show relative paths in search output (default, saves tokens for LLM usage)
    colgrep settings --relative-paths

NOTES:
    • Values are stored in ~/.config/colgrep/config.json
    • Use 0 to reset a value to its default
    • These values override the CLI defaults when not explicitly specified
    • Default output is compact (filepath:lines). Use -v or --verbose for full content
    • FP32 (full-precision) is the default
    • Pool factor 2 (default) reduces index size by ~50%. Use 1 to disable pooling
    • Parallel sessions default to CPU count. Batch-size 1 (default) maximizes throughput
    • Parser recursion depth defaults to 1024. Increase only if needed for deep ASTs
    • Extra ignore patterns add to the built-in defaults (node_modules, .git, target, etc.)
    • Force-include patterns override both built-in and extra ignore rules
    • Patterns match directory/file names (e.g., \".vscode\") or path prefixes (e.g., \"vendor/internal\")
    • Suffix patterns with * are supported (e.g., \"*.pb.go\")
    • Use --relative-paths to display paths relative to the current directory (saves ~35% tokens for LLM usage)";

#[derive(Parser)]
#[command(
    name = "colgrep",
    version,
    about = "Semantic grep - find code by meaning, not just text",
    long_about = "Semantic grep - find code by meaning, not just text.\n\n\
        colgrep is grep that understands what you're looking for. Search with\n\
        natural language like \"error handling logic\" and find relevant code\n\
        even when keywords don't match exactly.\n\n\
        • Hybrid search: grep + semantic ranking with -e flag\n\
        • Natural language queries\n\
        • 18+ languages supported\n\
        • Incremental indexing",
    after_help = MAIN_HELP,
    args_conflicts_with_subcommands = true
)]
pub struct Cli {
    #[command(subcommand)]
    pub command: Option<Commands>,

    // Default search arguments (when no subcommand is provided)
    /// Natural language query (runs search by default)
    #[arg(value_name = "QUERY")]
    pub query: Option<String>,

    /// Files or directories to search in (default: current directory)
    #[arg(value_name = "PATH")]
    pub paths: Vec<PathBuf>,

    /// Number of results (default: 15, or 10 if -n is used)
    #[arg(short = 'k', long = "results")]
    pub top_k: Option<usize>,

    /// ColBERT model HuggingFace ID or local path (uses saved preference if not specified)
    #[arg(long)]
    pub model: Option<String>,

    /// Output as JSON
    #[arg(long)]
    pub json: bool,

    /// Search recursively (default behavior, for grep compatibility)
    #[arg(short = 'r', long)]
    pub recursive: bool,

    /// Filter: search only files matching pattern (e.g., "*.py", "*.rs")
    #[arg(long = "include", value_name = "PATTERN")]
    pub include_patterns: Vec<String>,

    /// List files only: show only filenames, not the matching code
    #[arg(short = 'l', long = "files-only")]
    pub files_only: bool,

    /// Show function/class content with syntax highlighting (up to 50 lines)
    #[arg(short = 'c', long = "content")]
    pub show_content: bool,

    /// Number of context lines to show (default: 6, or config value)
    #[arg(short = 'n', long = "lines")]
    pub context_lines: Option<usize>,

    /// Text pattern: pre-filter using regex, then rank with semantic search
    #[arg(short = 'e', long = "pattern", value_name = "PATTERN")]
    pub text_pattern: Option<String>,

    /// Use extended regular expressions (ERE) for -e pattern (now default, kept for compatibility)
    #[arg(short = 'E', long = "extended-regexp")]
    pub extended_regexp: bool,

    /// Interpret -e pattern as fixed string, not regex (disables default regex mode)
    #[arg(short = 'F', long = "fixed-strings")]
    pub fixed_strings: bool,

    /// Match whole words only for -e pattern
    #[arg(short = 'w', long = "word-regexp")]
    pub word_regexp: bool,

    /// Make `-e` pattern matching case-sensitive. By default, colgrep
    /// matches case-insensitively (grep -i behaviour) — pass this flag to
    /// match the pattern exactly as typed.
    #[arg(short = 's', long = "case-sensitive")]
    pub case_sensitive: bool,

    /// Exclude files matching pattern (can be repeated)
    #[arg(long = "exclude", value_name = "PATTERN")]
    pub exclude_patterns: Vec<String>,

    /// Exclude directories - supports literal names and glob patterns (can be repeated)
    /// Examples: vendor, node_modules, */plugins, **/test_*, .claude/*
    #[arg(long = "exclude-dir", value_name = "DIR")]
    pub exclude_dirs: Vec<String>,

    /// Show statistics for all indexes
    #[arg(long)]
    pub stats: bool,

    /// Reset search statistics for all indexes
    #[arg(long)]
    pub reset_stats: bool,

    /// Only search code files, skip text/config files (md, txt, yaml, json, toml, etc.)
    #[arg(long)]
    pub code_only: bool,

    /// Disable FTS5 hybrid search (use pure semantic search only)
    #[arg(long = "semantic-only")]
    pub no_fts: bool,

    /// Hybrid search alpha: balance between keyword (0.0) and semantic (1.0). Default: 0.60.
    #[arg(long, value_name = "FLOAT")]
    pub alpha: Option<f32>,

    /// Install colgrep as a plugin for Claude Code
    #[arg(long = "install-claude-code")]
    pub install_claude_code: bool,

    /// Uninstall colgrep plugin from Claude Code
    #[arg(long = "uninstall-claude-code")]
    pub uninstall_claude_code: bool,

    /// Install colgrep for OpenCode
    #[arg(long = "install-opencode")]
    pub install_opencode: bool,

    /// Uninstall colgrep from OpenCode
    #[arg(long = "uninstall-opencode")]
    pub uninstall_opencode: bool,

    /// Install colgrep for Codex
    #[arg(long = "install-codex")]
    pub install_codex: bool,

    /// Uninstall colgrep from Codex
    #[arg(long = "uninstall-codex")]
    pub uninstall_codex: bool,

    /// Install colgrep for Hermes
    #[arg(long = "install-hermes")]
    pub install_hermes: bool,

    /// Uninstall colgrep from Hermes
    #[arg(long = "uninstall-hermes")]
    pub uninstall_hermes: bool,

    /// Completely uninstall colgrep: remove from all AI tools, clear all indexes, and remove all data
    #[arg(long = "uninstall")]
    pub uninstall: bool,

    /// Internal: Claude Code session hook (outputs JSON reminder)
    #[arg(long = "session-hook", hide = true)]
    pub session_hook: bool,

    /// Internal: Claude Code task hook (outputs JSON reminder for agent prompts)
    #[arg(long = "task-hook", hide = true)]
    pub task_hook: bool,

    /// Disable embedding pooling (use full embeddings, slower but more precise)
    #[arg(long = "no-pool")]
    pub no_pool: bool,

    /// Set embedding pool factor (default: 2, higher = fewer embeddings = faster)
    #[arg(long = "pool-factor", value_name = "FACTOR")]
    pub pool_factor: Option<usize>,

    /// Automatically confirm indexing without prompting (for large codebases > 10K code units)
    #[arg(short = 'y', long = "yes")]
    pub auto_confirm: bool,

    /// When to colorize output and syntax highlighting: auto, always, or never.
    /// `never` emits plain text with no ANSI escape sequences (useful for agents/pipes).
    #[arg(
        long = "color",
        value_name = "WHEN",
        default_value = "auto",
        global = true
    )]
    pub color: ColorChoice,

    /// Force CPU execution for all runtime paths
    #[arg(long = "force-cpu", global = true, conflicts_with = "force_gpu")]
    pub force_cpu: bool,

    /// Force GPU execution for all runtime paths and fail if unavailable
    #[arg(long = "force-gpu", global = true, conflicts_with = "force_cpu")]
    pub force_gpu: bool,
}

#[derive(Subcommand)]
pub enum Commands {
    /// Search for code semantically (auto-indexes if needed)
    #[command(after_help = SEARCH_HELP)]
    Search {
        /// Natural language query (optional if -e pattern is provided)
        query: Option<String>,

        /// Files or directories to search in (default: current directory)
        #[arg(value_name = "PATH")]
        paths: Vec<PathBuf>,

        /// Number of results (default: 15, or 10 if -n is used)
        #[arg(short = 'k', long = "results")]
        top_k: Option<usize>,

        /// ColBERT model HuggingFace ID or local path (uses saved preference if not specified)
        #[arg(long)]
        model: Option<String>,

        /// Output as JSON
        #[arg(long)]
        json: bool,

        /// Search recursively (default behavior, for grep compatibility)
        #[arg(short = 'r', long)]
        recursive: bool,

        /// Filter: search only files matching pattern (e.g., "*.py", "*.rs")
        #[arg(long = "include", value_name = "PATTERN")]
        include_patterns: Vec<String>,

        /// List files only: show only filenames, not the matching code
        #[arg(short = 'l', long = "files-only")]
        files_only: bool,

        /// Show full function/class content instead of just signature
        #[arg(short = 'c', long = "content")]
        show_content: bool,

        /// Number of context lines to show (default: 6, or config value)
        #[arg(short = 'n', long = "lines")]
        context_lines: Option<usize>,

        /// Text pattern: pre-filter using regex, then rank with semantic search
        #[arg(short = 'e', long = "pattern", value_name = "PATTERN")]
        text_pattern: Option<String>,

        /// Use extended regular expressions (ERE) for -e pattern (now default, kept for compatibility)
        #[arg(short = 'E', long = "extended-regexp")]
        extended_regexp: bool,

        /// Interpret -e pattern as fixed string, not regex (disables default regex mode)
        #[arg(short = 'F', long = "fixed-strings")]
        fixed_strings: bool,

        /// Match whole words only for -e pattern
        #[arg(short = 'w', long = "word-regexp")]
        word_regexp: bool,

        /// Match -e pattern case-sensitively (default is case-insensitive)
        #[arg(short = 's', long = "case-sensitive")]
        case_sensitive: bool,

        /// Exclude files matching pattern (can be repeated)
        #[arg(long = "exclude", value_name = "PATTERN")]
        exclude_patterns: Vec<String>,

        /// Exclude directories - supports literal names and glob patterns (can be repeated)
        /// Examples: vendor, node_modules, */plugins, **/test_*, .claude/*
        #[arg(long = "exclude-dir", value_name = "DIR")]
        exclude_dirs: Vec<String>,

        /// Only search code files, skip text/config files (md, txt, yaml, json, toml, etc.)
        #[arg(long)]
        code_only: bool,

        /// Disable FTS5 hybrid search (use pure semantic search only)
        #[arg(long = "semantic-only")]
        no_fts: bool,

        /// Hybrid search alpha: balance between keyword (0.0) and semantic (1.0). Default: 0.60.
        #[arg(long, value_name = "FLOAT")]
        alpha: Option<f32>,

        /// Disable embedding pooling (use full embeddings, slower but more precise)
        #[arg(long = "no-pool")]
        no_pool: bool,

        /// Set embedding pool factor (default: 2, higher = fewer embeddings = faster)
        #[arg(long = "pool-factor", value_name = "FACTOR")]
        pool_factor: Option<usize>,

        /// Automatically confirm indexing without prompting (for large codebases > 10K code units)
        #[arg(short = 'y', long = "yes")]
        auto_confirm: bool,

        /// Use strict batch-size batching instead of fixed dynamic GPU batching
        #[arg(long = "static-batch")]
        static_batch: bool,
    },

    /// Show index status for a project
    #[command(after_help = STATUS_HELP)]
    Status {
        /// Project directory
        #[arg(default_value = ".")]
        path: PathBuf,
    },

    /// Clear index data for a project or all projects
    #[command(after_help = CLEAR_HELP)]
    Clear {
        /// Project directory (default: current directory)
        #[arg(default_value = ".")]
        path: PathBuf,

        /// Clear all indexes for all projects
        #[arg(short = 'a', long)]
        all: bool,
    },

    /// Set the default ColBERT model to use for indexing and search
    #[command(after_help = SET_MODEL_HELP)]
    SetModel {
        /// HuggingFace model ID (e.g., "lightonai/LateOn-Code-edge")
        model: String,
    },

    /// Update colgrep to the latest version
    Update,

    /// Build or update the index without searching
    #[command(after_help = INIT_HELP)]
    Init {
        /// Project directory (default: current directory)
        #[arg(default_value = ".")]
        path: PathBuf,

        /// ColBERT model HuggingFace ID or local path (uses saved preference if not specified)
        #[arg(long)]
        model: Option<String>,

        /// Disable embedding pooling (use full embeddings, slower but more precise)
        #[arg(long = "no-pool")]
        no_pool: bool,

        /// Set embedding pool factor (default: 2, higher = fewer embeddings = faster)
        #[arg(long = "pool-factor", value_name = "FACTOR")]
        pool_factor: Option<usize>,

        /// Automatically confirm indexing without prompting (for large codebases > 10K code units)
        #[arg(short = 'y', long = "yes")]
        auto_confirm: bool,

        /// Override model/session batch size for this run
        #[arg(long = "batch-size", value_name = "SIZE")]
        batch_size: Option<usize>,

        /// Override outer encoding batch size for this run
        #[arg(long = "encode-batch-size", value_name = "SIZE")]
        encode_batch_size: Option<usize>,

        /// Override outer index chunk size for this run
        #[arg(long = "index-chunk-size", value_name = "SIZE")]
        index_chunk_size: Option<usize>,

        /// Use strict batch-size batching instead of fixed dynamic GPU batching
        #[arg(long = "static-batch")]
        static_batch: bool,
    },

    /// View or set configuration options (default k, n values)
    #[command(name = "settings", after_help = CONFIG_HELP)]
    Settings {
        /// Set default number of results (use 0 to reset to default)
        #[arg(long = "k")]
        default_k: Option<usize>,

        /// Set default context lines (use 0 to reset to default)
        #[arg(long = "n")]
        default_n: Option<usize>,

        /// Use full-precision (FP32) model (default)
        #[arg(long, conflicts_with = "int8")]
        fp32: bool,

        /// Use INT8 quantized model (faster inference)
        #[arg(long, conflicts_with = "fp32")]
        int8: bool,

        /// Set default pool factor for embedding compression (use 0 to reset to default 2)
        /// Higher values = faster search, fewer embeddings. Use 1 to disable pooling.
        #[arg(long = "pool-factor", value_name = "FACTOR")]
        pool_factor: Option<usize>,

        /// Set number of parallel ONNX sessions for encoding (use 0 to reset to auto = CPU count)
        /// More sessions = faster encoding on multi-core systems.
        #[arg(long = "parallel", value_name = "SESSIONS")]
        parallel_sessions: Option<usize>,

        /// Set batch size per encoding session (use 0 to reset to default 1)
        /// Smaller batches work better with parallel sessions.
        #[arg(long = "batch-size", value_name = "SIZE")]
        batch_size: Option<usize>,

        /// Set parser recursion depth guard (use 0 to reset to default 1024)
        #[arg(long = "max-recursion-depth", value_name = "DEPTH")]
        max_recursion_depth: Option<usize>,

        /// Enable verbose output by default (show full content with syntax highlighting)
        #[arg(long)]
        verbose: bool,

        /// Disable verbose output (show compact filepath:lines format, this is the default)
        #[arg(long = "no-verbose", conflicts_with = "verbose")]
        no_verbose: bool,

        /// Show relative paths in search output (relative to current directory)
        #[arg(long = "relative-paths", conflicts_with = "no_relative_paths")]
        relative_paths: bool,

        /// Show absolute paths in search output (this is the default)
        #[arg(long = "no-relative-paths", conflicts_with = "relative_paths")]
        no_relative_paths: bool,

        /// Enable hybrid search (FTS5 keyword + ColBERT semantic fused with RRF, this is the default)
        #[arg(long = "hybrid-search", conflicts_with = "no_hybrid_search")]
        hybrid_search: bool,

        /// Disable hybrid search (use pure semantic search only)
        #[arg(long = "no-hybrid-search", conflicts_with = "hybrid_search")]
        no_hybrid_search: bool,

        /// Set hybrid search alpha: balance between keyword (0.0) and semantic (1.0).
        /// Default: 0.75. Use 0 to reset to default.
        #[arg(long, value_name = "FLOAT")]
        alpha: Option<f32>,

        /// Add patterns to ignore during indexing (on top of defaults)
        /// Can be repeated. Examples: --ignore generated --ignore "*.pb.go"
        #[arg(long = "ignore", value_name = "PATTERN")]
        add_ignore: Vec<String>,

        /// Remove patterns from the extra ignore list
        /// Can be repeated. Examples: --no-ignore generated
        #[arg(long = "no-ignore", value_name = "PATTERN")]
        remove_ignore: Vec<String>,

        /// Add patterns to force-include even if normally ignored
        /// Can be repeated. Examples: --force-include .vscode --force-include vendor/internal
        #[arg(long = "force-include", value_name = "PATTERN")]
        add_force_include: Vec<String>,

        /// Remove patterns from the force-include list
        /// Can be repeated. Examples: --no-force-include .vscode
        #[arg(long = "no-force-include", value_name = "PATTERN")]
        remove_force_include: Vec<String>,

        /// Clear all custom ignore patterns (revert to defaults only)
        #[arg(long = "clear-ignore")]
        clear_ignore: bool,

        /// Clear all force-include patterns
        #[arg(long = "clear-force-include")]
        clear_force_include: bool,
    },
}