rag-rat 0.11.0

CLI and MCP entrypoint for indexing repositories into local source, graph, history, and memory evidence.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
//! Declarative command-line surface (clap derive). The parser owns `--help`/`-h`,
//! `--version`/`-V`, per-subcommand help, and flag validation — `main.rs` only dispatches on
//! the typed result. The global `--config` defaults to `rag-rat.toml` and may appear before or
//! after the subcommand.

use std::path::PathBuf;

use clap::{Args, Parser, Subcommand, ValueEnum};

#[derive(Debug, Parser)]
#[command(
    name = "rag-rat",
    version,
    about = "Local repo-intelligence index, graph, history, and memory — CLI + MCP server.",
    propagate_version = true
)]
pub(crate) struct Cli {
    /// Path to the rag-rat.toml config (relative to the current directory).
    #[arg(long, global = true, default_value = "rag-rat.toml")]
    pub config: String,

    /// Emit JSON instead of the default TOON (Token-Oriented Object Notation). TOON is denser for
    /// LLM consumers; pass --json when a JSON parser must read the output. For commands that print
    /// a human summary by default (`reconcile --plan`, `eval`, `memory doctor`), --json also
    /// selects their structured output.
    #[arg(long, global = true)]
    pub json: bool,

    #[command(subcommand)]
    pub command: Command,
}

#[derive(Debug, Subcommand)]
pub(crate) enum Command {
    /// Scan the repository and write a starter rag-rat.toml (interactive).
    Init(InitArgs),

    /// Internal: Claude Code hook entrypoint (reads a JSON event on stdin).
    #[command(hide = true)]
    ClaudeHook,

    /// Index the repository (default: changed files only).
    Index(IndexArgs),

    /// Report schema, storage, discovery, targets, and index health as JSON.
    Doctor,

    /// Search the index (lexical + semantic).
    Query(QueryArgs),

    /// Repo orientation brief (spine / churn / god-modules / ownership).
    Brief(BriefArgs),

    /// Ownership / co-change clusters.
    Clusters(ClustersArgs),

    /// Rank the most load-bearing symbols by weighted PageRank over the edge graph.
    ImportantSymbols(ImportantSymbolsArgs),

    /// List candidate clone classes ranked by refactor ROI.
    Clones(ClonesArgs),

    /// Reverse-lookup: show the clone class containing a given symbol (if any).
    ClonesFor(ClonesForArgs),

    /// Run the stdio MCP server.
    Mcp,

    /// Inspect and re-anchor source-anchored repo memories.
    Memory(MemoryArgs),

    /// Dream-mode memory-maintenance worklist (#122): deterministic coverage-gap + stale-reference
    /// findings written to `dream_findings`. Surfaces findings ABOUT memories; never mutates them.
    Dream(DreamArgs),

    /// GitHub papertrail sync.
    Github(GithubArgs),

    /// Install / uninstall / inspect git hooks and Claude Code hooks.
    Hooks(HooksArgs),

    /// Bounded post-git-operation index maintenance (invoked by hooks).
    Maintenance(MaintenanceArgs),

    /// List or install on-device embedding models.
    Models(ModelsArgs),

    /// Compute or refresh embeddings for indexed chunks.
    Reconcile(ReconcileArgs),

    /// Garbage-collect index rows for dead git contexts.
    Gc,

    /// Run the search-quality eval suite (CI gate; requires the `eval` build feature).
    #[cfg(feature = "eval")]
    Eval(EvalArgs),

    /// Benchmark ephemeral remote embedding throughput across concurrency candidates, emitting
    /// per-candidate texts/s as JSON (requires the `eval` build feature). Provisions an ephemeral
    /// cookbook box, runs the sweep, and tears it down.
    #[cfg(feature = "eval")]
    BenchmarkEmbedding(BenchmarkEmbeddingArgs),

    /// SCIP-oracle pass: compiler-grade edge resolution from a language indexer.
    Oracle(OracleArgs),

    /// Print the resolved configuration as JSON.
    DumpConfig,

    /// Check crates.io for a newer published rag-rat, refresh the cache, and print current vs
    /// latest.
    VersionCheck,
}

#[derive(Debug, Args)]
pub(crate) struct InitArgs {
    /// Print the rendered config to stdout without writing anything.
    #[arg(long)]
    pub dry_run: bool,
    /// Accept all defaults non-interactively.
    #[arg(long, short = 'y')]
    pub yes: bool,
    /// Overwrite an existing config without prompting.
    #[arg(long)]
    pub force: bool,
}

#[derive(Debug, Args)]
pub(crate) struct IndexArgs {
    /// Full rebuild from scratch.
    #[arg(long)]
    pub full: bool,
    /// Re-discover all target files (additive), then index changed ones.
    #[arg(long)]
    pub discover: bool,
    /// Index only changed files (the default).
    #[arg(long)]
    pub changed: bool,
    /// Index a LINKED git worktree's branch overlay on top of the existing base index, so queries
    /// scoped to it (`--worktree` / the MCP `worktree` arg) see that branch's changes. Indexes
    /// only the delta vs the base; does not rebuild the base.
    #[arg(long, value_name = "PATH")]
    pub worktree: Option<std::path::PathBuf>,
    /// Run the background file watcher in the foreground until interrupted.
    #[arg(long)]
    pub watch: bool,
}

#[derive(Debug, Args)]
pub(crate) struct QueryArgs {
    /// Show the ranking explanation instead of JSON results.
    #[arg(long)]
    pub explain: bool,
    /// The search string (multiple words are joined).
    #[arg(required = true, num_args = 1.., value_name = "QUERY")]
    pub query: Vec<String>,
}

#[derive(Debug, Args)]
pub(crate) struct BriefArgs {
    /// Brief mode: spine, churn, god_modules, ownership.
    #[arg(long)]
    pub mode: Option<String>,
    /// Max rows to return.
    #[arg(long)]
    pub limit: Option<u32>,
    /// Include generated files.
    #[arg(long)]
    pub include_generated: bool,
    /// Omit drive-by repo memories.
    #[arg(long)]
    pub no_memories: bool,
}

#[derive(Debug, Args)]
pub(crate) struct ImportantSymbolsArgs {
    /// Max load-bearing symbols to return.
    #[arg(long)]
    pub limit: Option<u32>,
    /// Symbols to bias importance toward (the symbols you're working on) — names, refs
    /// (path::name), or sym_<hex> handles, comma-separated or repeated. A sym_<hex> handle
    /// resolves to its logical symbol's members; otherwise the entry is resolved by ref then
    /// name (ambiguous/missing entries are skipped). Raw numeric symbol ids are NOT accepted —
    /// they are reindex-churned rowids (#149). Empty = global importance (the CLI is
    /// global-by-default — it never auto-seeds from the git diff).
    #[arg(long, value_delimiter = ',')]
    pub personalize: Vec<String>,
}

#[derive(Debug, Args)]
pub(crate) struct DreamArgs {
    /// Max coverage-gap findings to surface (stale-reference findings are always all reported).
    #[arg(long)]
    pub limit: Option<u32>,
}

#[derive(Debug, Args)]
pub(crate) struct ClonesArgs {
    /// Minimum pairwise overlap/max_len similarity. Valid range [0.5, 1.0] (default: 0.7, the θ
    /// threshold); out-of-range values are rejected.
    #[arg(long)]
    pub min_similarity: Option<f64>,
    /// Minimum number of copies for a class to be returned (default: 2).
    #[arg(long)]
    pub min_copies: Option<usize>,
    /// Maximum number of clone classes to return, sorted by ROI descending. A supplied limit is
    /// capped at the refine budget (currently 50): --limit N returns at most 50 classes, all
    /// refined. Omit the flag to retrieve all classes (only the top 50 refined).
    #[arg(long)]
    pub limit: Option<usize>,
    /// Print a human-readable explanation (template + variation points + proposed signature) for
    /// the refined class with this key (from the `class_key` field of a prior `clones` run)
    /// instead of the JSON/TOON listing.
    #[arg(long, value_name = "CLASS_KEY")]
    pub explain: Option<String>,
    /// Print a canonical, cross-build-stable RECALL signature instead of the listing: one sorted
    /// line per clone class (`<member_count>\t<sorted member refs>`), keyed on `path::symbol` refs
    /// (not rowids). This is the recall half of the clone measurement harness (#279): dump it on
    /// two builds (e.g. before/after a candidate-pruning change like #271's hot-token cap) and
    /// `diff` them — a removed or shrunk line is a recall regression. Forces a complete pass
    /// (ignores `--limit`).
    #[arg(long)]
    pub recall_signature: bool,
    /// Print the SORTED, UNCAPPED set of clone-symbol refs (one `path::symbol` per line) — every
    /// symbol that is in any coherent clone class. The SYMBOL-level recall signal for the #279
    /// harness, and the one to use when a change alters clustering granularity: unlike
    /// `--recall-signature` (class lines, capped at the per-class member limit), this counts every
    /// member of every class, so `diff`-ing two builds catches a symbol that stopped being a clone
    /// without false alarms from the member cap. Ignores `--limit`.
    #[arg(long)]
    pub recall_symbols: bool,
    /// Precompute + persist the clone-edge graph (a background-style writer pass), so subsequent
    /// `find_clones` / `clones-for` queries read the persisted graph instead of recomputing the
    /// super-linear candidate pairs every call — the way the graph scales to large repos (#286).
    /// Runs to completion under a write lock; re-running on unchanged content is a no-op. Prints a
    /// build report instead of the clone listing.
    #[arg(long)]
    pub precompute: bool,
    /// Soft per-pass time budget (seconds) for `--precompute`; the build checkpoints and resumes,
    /// so a bound leaves a partial graph that the next pass continues. Omit to run
    /// uninterrupted.
    #[arg(long, value_name = "SECONDS")]
    pub max_seconds: Option<u64>,
}

/// Selector for `clones-for`: positional `SYMBOL` (a qualified ref or `sym_<hex>` handle), or
/// `--path` + `--line` for a location-based lookup. Exactly one of these forms is required.
#[derive(Debug, Args)]
pub(crate) struct ClonesForArgs {
    /// Qualified symbol reference (`path/to/file.rs::fn_name`) or a `sym_<hex>` handle.
    #[arg(value_name = "SYMBOL")]
    pub symbol: Option<String>,
    /// File path for a PathLine lookup (requires --line).
    #[arg(long, value_name = "PATH")]
    pub path: Option<String>,
    /// Line number for a PathLine lookup (requires --path).
    #[arg(long, value_name = "N")]
    pub line: Option<i64>,
}

#[derive(Debug, Args)]
pub(crate) struct ClustersArgs {
    /// Max clusters to return.
    #[arg(long)]
    pub limit: Option<u32>,
    /// Minimum cluster size.
    #[arg(long)]
    pub min_cluster_size: Option<u32>,
    /// Include generated files.
    #[arg(long)]
    pub include_generated: bool,
    /// Omit drive-by repo memories.
    #[arg(long)]
    pub no_memories: bool,
}

#[derive(Debug, Args)]
pub(crate) struct MaintenanceArgs {
    /// What triggered this pass (manual, post-checkout, post-merge, ...).
    #[arg(long)]
    pub trigger: Option<String>,
    /// Soft time budget for the reconcile phase, in seconds.
    #[arg(long)]
    pub max_seconds: Option<u64>,
    /// git post-checkout flag: 1 = branch checkout, 0 = file checkout.
    #[arg(long)]
    pub branch_checkout: Option<String>,
    /// git post-checkout: previous HEAD.
    #[arg(long)]
    pub old_head: Option<String>,
    /// git post-checkout: new HEAD.
    #[arg(long)]
    pub new_head: Option<String>,
}

#[derive(Debug, Args)]
pub(crate) struct ReconcileArgs {
    /// Report the reconcile plan without computing embeddings.
    #[arg(long)]
    pub plan: bool,
    /// Cap on chunks to embed this pass.
    #[arg(long)]
    pub limit: Option<u32>,
    /// Embedding batch size.
    #[arg(long)]
    pub batch_size: Option<u32>,
    /// Recompute even up-to-date embeddings.
    #[arg(long)]
    pub force: bool,
    /// Keep going until no backlog remains.
    #[arg(long)]
    pub until_clean: bool,
    /// Embed changed files first.
    #[arg(long)]
    pub changed_first: bool,
    /// Soft time budget in seconds.
    #[arg(long)]
    pub max_seconds: Option<u64>,
    /// Truncate chunk text to this many chars before embedding.
    #[arg(long)]
    pub max_embedding_chars: Option<usize>,
    /// Force the legacy-f32 → int8 vector re-encode now (#312), ignoring the run-once meta gate.
    /// A format-only conversion (no model inference); idempotent — converts only rows still in
    /// f32. SHORT-CIRCUITS: re-encodes and exits, ignoring the other reconcile flags (no
    /// embeddings are computed). Honors `--max-seconds` (the conversion is bounded and resumes
    /// on a later run).
    #[arg(long)]
    pub reencode_vectors: bool,
}

#[cfg(feature = "eval")]
#[derive(Debug, Args)]
pub(crate) struct EvalArgs {
    /// Path to the queries TOML (defaults to <root>/evals/queries.toml).
    #[arg(long)]
    pub queries: Option<PathBuf>,
    /// Path to the expected-hits TOML (defaults to <root>/evals/expected_hits.toml).
    #[arg(long)]
    pub expected: Option<PathBuf>,
    /// Rewrite the baseline from this run's results.
    #[arg(long)]
    pub update_baseline: bool,
    /// Optional pre-built `.scip` index to drive SCIP-oracle precision/recall metrics (#68).
    /// Defaults to <root>/evals/oracle.scip when present; absent → oracle metrics skipped.
    #[arg(long)]
    pub scip: Option<PathBuf>,
    /// Commit-replay eval (#120): generate cases from indexed git history (commit message = query,
    /// diff's changed paths = recall gold) instead of the static queries TOML.
    #[arg(long)]
    pub replay: bool,
    /// Max recent commits to turn into replay cases.
    #[arg(long, default_value_t = 200)]
    pub replay_max_cases: u32,
    /// Skip bulk/mechanical commits whose changed-file count exceeds this (recall noise).
    #[arg(long, default_value_t = 20)]
    pub replay_max_files: u32,
    /// Leakage-free replay: score each case against an index of its commit's PARENT state (a
    /// throwaway worktree + full reindex per case). Slower; the absolute headline number. Implies
    /// `--replay`.
    #[arg(long)]
    pub replay_parent_state: bool,
    /// Run searches with the graded-git rerank ON (#109): scores the SAME at-head index with
    /// `[search] graded_git_rerank` forced true, for an A/B against the default fuse. Applies to
    /// both the active and the hash-vector-baseline pass. Pair with `--replay` for the inner-loop
    /// dial (`rag-rat eval --replay --rerank`).
    #[arg(long)]
    pub rerank: bool,
    /// How many hits each search returns — the width of the candidate pool scored (#109). Default
    /// 10 (unchanged behavior). `recall@3`/`recall@10` stay FIXED top-3/top-10 cutoffs regardless;
    /// widening this only grows `recall_at_returned`, the candidate-recall ceiling. At 100 it
    /// measures recall@100 ≈ the candidate-generation ceiling — pure measurement, no search
    /// change.
    #[arg(long, default_value_t = 10)]
    pub search_limit: usize,
}

/// `benchmark-embedding` (#346): provision an ephemeral cookbook box and sweep embedding throughput
/// across concurrency candidates, emitting per-candidate texts/s as JSON. The PRIMARY output is
/// JSON regardless of the global render flag (the point of the command is machine-readable
/// backend/concurrency comparison).
#[cfg(feature = "eval")]
#[derive(Debug, Args)]
pub(crate) struct BenchmarkEmbeddingArgs {
    /// The ephemeral cookbook provider spec that provisions the on-demand box (e.g.
    /// `"@rag-rat/cookbook modal"`). Required — this command only benchmarks ephemeral boxes.
    #[arg(long)]
    pub cookbook: String,
    /// Which OpenAI-compatible backend to provision + benchmark (`ollama` | `infinity` | `vllm`).
    #[arg(long, default_value = "ollama", value_parser = parse_remote_backend)]
    pub backend: rag_rat_core::config::RemoteBackend,
    /// The server-side model to serve: an ollama model name (ollama backend) or a HuggingFace id
    /// (infinity/vLLM). Required. Off-registry HF models fall back to a measured dim (one probe
    /// embed) since they have no registry spec.
    #[arg(long)]
    pub model: String,
    /// GPU hint for the recipe (provider-specific, e.g. `A10G`/`T4`). Omit to let the recipe pick.
    #[arg(long)]
    pub gpu: Option<String>,
    /// Concurrency candidates to measure, comma-separated (e.g. `1,2,4,8,16,32`). Omit to sweep
    /// the tuner's default ladder (powers of two up to the config's concurrency cap, plus the
    /// cap).
    #[arg(long, value_delimiter = ',')]
    pub candidates: Vec<u32>,
    /// Total sweep budget in milliseconds (split across candidates). Omit for the tuner default.
    #[arg(long)]
    pub budget_ms: Option<u64>,
    /// Write the JSON report to this path instead of stdout.
    #[arg(long)]
    pub output: Option<PathBuf>,
}

/// clap `value_parser` for `--backend`: parse the backend selector via the SAME
/// [`RemoteBackend::from_db_str`](rag_rat_core::config::RemoteBackend::from_db_str) the config
/// layer uses, so the CLI and config accept identical spellings.
#[cfg(feature = "eval")]
fn parse_remote_backend(s: &str) -> Result<rag_rat_core::config::RemoteBackend, String> {
    rag_rat_core::config::RemoteBackend::from_db_str(s)
        .ok_or_else(|| format!("unknown backend `{s}` (expected ollama, infinity, or vllm)"))
}

#[derive(Debug, Args)]
pub(crate) struct OracleArgs {
    #[command(subcommand)]
    pub command: OracleCommand,
}

#[derive(Debug, Subcommand)]
pub(crate) enum OracleCommand {
    /// Run an oracle pass: invoke the indexer (or consume a pre-built `.scip`) and write verdicts.
    Run(OracleRunArgs),
    /// Report oracle verdict counts + whether the indexer tool is installed.
    Status(OracleStatusArgs),
    /// Run the oracle for a declared corpus and emit its typed before/after resolution report
    /// (C2). Applies the corpus health gate: exits non-zero if the run falls outside thresholds.
    Report(OracleReportArgs),
}

#[derive(Debug, Args)]
pub(crate) struct OracleRunArgs {
    /// The oracle tool to use (default: rust-analyzer).
    #[arg(long, value_enum, default_value_t = OracleToolArg::RustAnalyzer)]
    pub tool: OracleToolArg,
    /// Consume a pre-built `.scip` index instead of invoking the tool. Deterministic; the tool
    /// need not be installed.
    #[arg(long)]
    pub scip: Option<PathBuf>,
}

#[derive(Debug, Args)]
pub(crate) struct OracleStatusArgs {
    /// Report on one oracle tool only (default: every known tool).
    #[arg(long, value_enum)]
    pub tool: Option<OracleToolArg>,
}

#[derive(Debug, Args)]
pub(crate) struct OracleReportArgs {
    /// The corpus id to report on (must match a `[[corpus]]` entry's `corpus_id`).
    #[arg(long)]
    pub corpus: String,
    /// Path to the corpus profiles file. Defaults to `<root>/tools/oracle-corpora.toml`.
    #[arg(long)]
    pub corpora: Option<PathBuf>,
    /// Consume a pre-built `.scip` instead of invoking the corpus's tool. Deterministic; the tool
    /// need not be installed.
    #[arg(long)]
    pub scip: Option<PathBuf>,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
pub(crate) enum OracleToolArg {
    #[value(name = "rust-analyzer")]
    RustAnalyzer,
    #[value(name = "scip-clang")]
    ScipClang,
    #[value(name = "scip-python")]
    ScipPython,
    #[value(name = "scip-typescript")]
    ScipTypescript,
    #[value(name = "scip-java")]
    ScipJava,
}

impl OracleToolArg {
    pub(crate) fn core(self) -> rag_rat_core::index::oracle::OracleTool {
        match self {
            OracleToolArg::RustAnalyzer => rag_rat_core::index::oracle::OracleTool::RustAnalyzer,
            OracleToolArg::ScipClang => rag_rat_core::index::oracle::OracleTool::ScipClang,
            OracleToolArg::ScipPython => rag_rat_core::index::oracle::OracleTool::ScipPython,
            OracleToolArg::ScipTypescript =>
                rag_rat_core::index::oracle::OracleTool::ScipTypescript,
            OracleToolArg::ScipJava => rag_rat_core::index::oracle::OracleTool::ScipJava,
        }
    }
}

#[derive(Debug, Args)]
pub(crate) struct MemoryArgs {
    #[command(subcommand)]
    pub command: MemoryCommand,
}

#[derive(Debug, Subcommand)]
pub(crate) enum MemoryCommand {
    /// List memories (optionally filtered by kind).
    List {
        #[arg(long)]
        kind: Option<String>,
    },
    /// Show one memory by id.
    Show { memory_id: String },
    /// Report non-current anchors with rebind suggestions.
    Doctor,
    /// Re-anchor a memory to a symbol, path, or chunk.
    Rebind {
        memory_id: String,
        /// Symbol name (substring-matched); cfg-split groups resolve to one. Ambiguous names list
        /// `--symbol-id` choices — prefer `--symbol-path` for an exact qualified name.
        #[arg(long)]
        symbol: Option<String>,
        /// Exact qualified name (`path::name`) — what `memory doctor` suggests; cfg-split safe.
        #[arg(long)]
        symbol_path: Option<String>,
        /// Exact symbol id — the escape hatch when same-name symbols can't be told apart.
        #[arg(long)]
        symbol_id: Option<i64>,
        #[arg(long)]
        path: Option<String>,
        #[arg(long)]
        chunk: Option<i64>,
        /// Directory anchor relative to the repo root (`""` for the repo root) — the area-level
        /// binding `dir`-bound memories use.
        #[arg(long)]
        dir: Option<String>,
    },
}

#[derive(Debug, Args)]
pub(crate) struct GithubArgs {
    #[command(subcommand)]
    pub command: GithubCommand,
}

#[derive(Debug, Subcommand)]
pub(crate) enum GithubCommand {
    /// Sync issues/PRs into the papertrail.
    Sync {
        /// Sync only refs already mentioned in indexed source/commits.
        #[arg(long)]
        from_refs: bool,
        /// Sync a single issue/PR (owner/repo#number).
        #[arg(long)]
        issue: Option<String>,
        /// Do not hit the network; use cached evidence only.
        #[arg(long)]
        offline: bool,
    },
}

#[derive(Debug, Args)]
pub(crate) struct HooksArgs {
    /// install, uninstall, or status.
    #[arg(value_enum)]
    pub action: HookAction,
    /// Operate on Claude Code hooks (settings.json) instead of git hooks.
    #[arg(long)]
    pub claude: bool,
    /// With --claude: target ~/.claude/settings.json instead of ./.claude.
    #[arg(long)]
    pub global: bool,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
pub(crate) enum HookAction {
    Install,
    Uninstall,
    Status,
}

impl HookAction {
    pub(crate) fn as_str(self) -> &'static str {
        match self {
            HookAction::Install => "install",
            HookAction::Uninstall => "uninstall",
            HookAction::Status => "status",
        }
    }
}

#[derive(Debug, Args)]
pub(crate) struct ModelsArgs {
    #[command(subcommand)]
    pub command: Option<ModelsCommand>,
}

#[derive(Debug, Subcommand)]
pub(crate) enum ModelsCommand {
    /// List models and their install state (the default).
    List,
    /// Download and install a model by id. A `[llm.embedding.remote]` block in `rag-rat.toml`
    /// installs it over Ollama instead; otherwise it's a local install.
    Install { model_id: String },
}

#[cfg(test)]
mod tests {
    use clap::CommandFactory;

    use super::*;

    #[test]
    fn cli_definition_is_valid() {
        Cli::command().debug_assert();
    }

    #[test]
    fn parses_global_config_after_subcommand() {
        let cli = Cli::try_parse_from(["rag-rat", "query", "--config", "x.toml", "foo", "bar"])
            .expect("parse");
        assert_eq!(cli.config, "x.toml");
        match cli.command {
            Command::Query(args) => {
                assert_eq!(args.query, vec!["foo", "bar"]);
                assert!(!args.explain);
            },
            other => panic!("expected query, got {other:?}"),
        }
    }

    #[test]
    fn config_defaults_to_rag_rat_toml() {
        let cli = Cli::try_parse_from(["rag-rat", "gc"]).expect("parse");
        assert_eq!(cli.config, "rag-rat.toml");
    }

    #[test]
    fn json_flag_defaults_off_and_is_global() {
        // Absent → TOON (false). Present after the subcommand (global) → JSON (true).
        let default = Cli::try_parse_from(["rag-rat", "gc"]).expect("parse");
        assert!(!default.json, "--json must default off (TOON is the default render)");

        let flagged = Cli::try_parse_from(["rag-rat", "query", "foo", "--json"]).expect("parse");
        assert!(flagged.json, "--json must be accepted globally, after the subcommand");
    }

    #[test]
    fn version_flag_short_circuits() {
        let err = Cli::try_parse_from(["rag-rat", "--version"]).unwrap_err();
        assert_eq!(err.kind(), clap::error::ErrorKind::DisplayVersion);
    }

    #[test]
    fn help_flag_short_circuits() {
        let err = Cli::try_parse_from(["rag-rat", "--help"]).unwrap_err();
        assert_eq!(err.kind(), clap::error::ErrorKind::DisplayHelp);
    }

    #[test]
    fn nested_memory_rebind_parses() {
        let cli = Cli::try_parse_from(["rag-rat", "memory", "rebind", "mem_1", "--symbol", "foo"])
            .expect("parse");
        match cli.command {
            Command::Memory(MemoryArgs {
                command: MemoryCommand::Rebind { memory_id, symbol, .. },
            }) => {
                assert_eq!(memory_id, "mem_1");
                assert_eq!(symbol.as_deref(), Some("foo"));
            },
            other => panic!("expected memory rebind, got {other:?}"),
        }
    }

    #[test]
    fn memory_rebind_symbol_id_and_path_parse() {
        let cli =
            Cli::try_parse_from(["rag-rat", "memory", "rebind", "mem_2", "--symbol-id", "42"])
                .expect("parse");
        match cli.command {
            Command::Memory(MemoryArgs {
                command: MemoryCommand::Rebind { symbol_id, symbol_path, symbol, .. },
            }) => {
                assert_eq!(symbol_id, Some(42));
                assert_eq!(symbol_path, None);
                assert_eq!(symbol, None);
            },
            other => panic!("expected memory rebind, got {other:?}"),
        }

        let cli = Cli::try_parse_from([
            "rag-rat",
            "memory",
            "rebind",
            "mem_3",
            "--symbol-path",
            "src/a.rs::foo",
        ])
        .expect("parse");
        match cli.command {
            Command::Memory(MemoryArgs { command: MemoryCommand::Rebind { symbol_path, .. } }) => {
                assert_eq!(symbol_path.as_deref(), Some("src/a.rs::foo"))
            },
            other => panic!("expected memory rebind, got {other:?}"),
        }
    }

    #[test]
    fn hooks_action_and_flags_parse() {
        let cli = Cli::try_parse_from(["rag-rat", "hooks", "install", "--claude", "--global"])
            .expect("parse");
        match cli.command {
            Command::Hooks(args) => {
                assert_eq!(args.action, HookAction::Install);
                assert!(args.claude && args.global);
            },
            other => panic!("expected hooks, got {other:?}"),
        }
    }

    #[test]
    fn clones_parses_min_copies() {
        let cli = Cli::try_parse_from(["rag-rat", "clones", "--min-copies", "3"]).expect("parse");
        match cli.command {
            Command::Clones(args) => {
                assert_eq!(args.min_copies, Some(3));
                assert!(args.min_similarity.is_none());
                assert!(args.limit.is_none());
                assert!(args.explain.is_none());
            },
            other => panic!("expected clones, got {other:?}"),
        }
    }

    #[test]
    fn clones_parses_all_flags() {
        let cli = Cli::try_parse_from([
            "rag-rat",
            "clones",
            "--min-similarity",
            "0.8",
            "--min-copies",
            "3",
            "--limit",
            "10",
            "--explain",
            "deadbeef12345678",
        ])
        .expect("parse");
        match cli.command {
            Command::Clones(args) => {
                assert_eq!(args.min_similarity, Some(0.8));
                assert_eq!(args.min_copies, Some(3));
                assert_eq!(args.limit, Some(10));
                assert_eq!(args.explain.as_deref(), Some("deadbeef12345678"));
            },
            other => panic!("expected clones, got {other:?}"),
        }
    }

    #[test]
    fn clones_for_parses_positional_ref() {
        let cli =
            Cli::try_parse_from(["rag-rat", "clones-for", "src/a.rs::load_user"]).expect("parse");
        match cli.command {
            Command::ClonesFor(args) => {
                assert_eq!(args.symbol.as_deref(), Some("src/a.rs::load_user"));
                assert!(args.path.is_none());
                assert!(args.line.is_none());
            },
            other => panic!("expected clones-for, got {other:?}"),
        }
    }

    #[test]
    fn clones_for_parses_path_line() {
        let cli =
            Cli::try_parse_from(["rag-rat", "clones-for", "--path", "src/a.rs", "--line", "1"])
                .expect("parse");
        match cli.command {
            Command::ClonesFor(args) => {
                assert!(args.symbol.is_none());
                assert_eq!(args.path.as_deref(), Some("src/a.rs"));
                assert_eq!(args.line, Some(1));
            },
            other => panic!("expected clones-for, got {other:?}"),
        }
    }

    #[test]
    fn clones_for_parses_sym_handle() {
        let cli =
            Cli::try_parse_from(["rag-rat", "clones-for", "sym_deadbeef12345678"]).expect("parse");
        match cli.command {
            Command::ClonesFor(args) => {
                assert_eq!(args.symbol.as_deref(), Some("sym_deadbeef12345678"));
            },
            other => panic!("expected clones-for, got {other:?}"),
        }
    }

    #[cfg(feature = "eval")]
    #[test]
    fn benchmark_embedding_parses_candidates_and_backend() {
        let cli = Cli::try_parse_from([
            "rag-rat",
            "benchmark-embedding",
            "--cookbook",
            "@rag-rat/cookbook modal",
            "--backend",
            "infinity",
            "--model",
            "sentence-transformers/all-MiniLM-L6-v2",
            "--candidates",
            "1,2,4",
            "--budget-ms",
            "30000",
            "--gpu",
            "A10G",
        ])
        .expect("parse");
        match cli.command {
            Command::BenchmarkEmbedding(args) => {
                assert_eq!(args.cookbook, "@rag-rat/cookbook modal");
                assert_eq!(args.backend, rag_rat_core::config::RemoteBackend::Infinity);
                assert_eq!(args.model, "sentence-transformers/all-MiniLM-L6-v2");
                assert_eq!(args.candidates, vec![1, 2, 4]);
                assert_eq!(args.budget_ms, Some(30_000));
                assert_eq!(args.gpu.as_deref(), Some("A10G"));
                assert!(args.output.is_none());
            },
            other => panic!("expected benchmark-embedding, got {other:?}"),
        }
    }

    #[cfg(feature = "eval")]
    #[test]
    fn benchmark_embedding_defaults_backend_ollama_and_omits_candidates() {
        // `--cookbook` + `--model` are the only required flags; backend defaults to ollama and the
        // candidate list is empty (→ the handler uses the default ladder).
        let cli = Cli::try_parse_from([
            "rag-rat",
            "benchmark-embedding",
            "--cookbook",
            "@rag-rat/cookbook modal",
            "--model",
            "all-minilm",
        ])
        .expect("parse");
        match cli.command {
            Command::BenchmarkEmbedding(args) => {
                assert_eq!(args.backend, rag_rat_core::config::RemoteBackend::Ollama);
                assert!(args.candidates.is_empty());
                assert!(args.budget_ms.is_none());
            },
            other => panic!("expected benchmark-embedding, got {other:?}"),
        }
        // An unknown backend is rejected by the value_parser.
        assert!(
            Cli::try_parse_from([
                "rag-rat",
                "benchmark-embedding",
                "--cookbook",
                "cb",
                "--model",
                "m",
                "--backend",
                "bogus",
            ])
            .is_err(),
            "an unknown --backend must be rejected"
        );
        // `--cookbook` and `--model` are both required.
        assert!(Cli::try_parse_from(["rag-rat", "benchmark-embedding", "--model", "m"]).is_err());
        assert!(
            Cli::try_parse_from(["rag-rat", "benchmark-embedding", "--cookbook", "cb"]).is_err()
        );
    }

    #[test]
    fn oracle_run_defaults_to_rust_analyzer() {
        let cli = Cli::try_parse_from(["rag-rat", "oracle", "run"]).expect("parse");
        match cli.command {
            Command::Oracle(OracleArgs { command: OracleCommand::Run(args) }) => {
                assert_eq!(args.tool, OracleToolArg::RustAnalyzer);
                assert!(args.scip.is_none());
            },
            other => panic!("expected oracle run, got {other:?}"),
        }
    }

    #[test]
    fn oracle_run_accepts_scip_path() {
        let cli = Cli::try_parse_from(["rag-rat", "oracle", "run", "--scip", "/tmp/x.scip"])
            .expect("parse");
        match cli.command {
            Command::Oracle(OracleArgs { command: OracleCommand::Run(args) }) => {
                assert_eq!(args.scip.as_deref(), Some(std::path::Path::new("/tmp/x.scip")));
            },
            other => panic!("expected oracle run, got {other:?}"),
        }
    }

    #[test]
    fn oracle_status_parses() {
        let cli = Cli::try_parse_from(["rag-rat", "oracle", "status"]).expect("parse");
        assert!(matches!(
            cli.command,
            Command::Oracle(OracleArgs { command: OracleCommand::Status(_) })
        ));
    }

    #[test]
    fn oracle_report_requires_corpus_and_takes_optional_paths() {
        // `--corpus` is mandatory; a bare `oracle report` must not parse.
        assert!(Cli::try_parse_from(["rag-rat", "oracle", "report"]).is_err());
        let cli = Cli::try_parse_from([
            "rag-rat",
            "oracle",
            "report",
            "--corpus",
            "py-requests",
            "--corpora",
            "/tmp/corpora.toml",
            "--scip",
            "/tmp/x.scip",
        ])
        .expect("parse");
        match cli.command {
            Command::Oracle(OracleArgs { command: OracleCommand::Report(args) }) => {
                assert_eq!(args.corpus, "py-requests");
                assert_eq!(
                    args.corpora.as_deref(),
                    Some(std::path::Path::new("/tmp/corpora.toml"))
                );
                assert_eq!(args.scip.as_deref(), Some(std::path::Path::new("/tmp/x.scip")));
            },
            other => panic!("expected oracle report, got {other:?}"),
        }
    }
}