skill-inject 0.9.0

skill-inject: local semantic auto-injection of agent skills
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
//! Runtime configuration. Compiled defaults ([`Config::base`]), overlaid by an
//! optional user file (`~/.config/ski/config.toml`, see [`FileConfig`]) loaded
//! through [`Config::load`]. The file is the escape hatch: silence a noisy skill
//! with `deny`, pin `rerank_min`, widen `max_skills`, etc. without a rebuild.

use crate::embed::Embedder;
use crate::hook::Host;
use serde::Deserialize;
use std::path::{Path, PathBuf};

/// How a matched skill is delivered to the model.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum InjectMode {
    /// Tell the model a relevant skill exists and let it load the file (keeps
    /// model agency; the v1 default).
    Directive,
    /// Inject the `SKILL.md` body straight into context.
    Body,
}

/// Forcefulness of a `directive`-mode injection.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Strength {
    /// Resolve from the host (Claude -> soft, opencode -> hard).
    Auto,
    /// A nudge — enough for a strong native chooser.
    Soft,
    /// An imperative — for weak local choosers.
    Hard,
}

#[derive(Debug, Clone)]
pub struct Config {
    /// Embedding model id. Recognized by the fastembed backend; otherwise the
    /// offline bag-of-words backend is used regardless of this value.
    pub model: String,
    /// Minimum hybrid score for a skill to be eligible for injection.
    pub min_similarity: f32,
    /// Max gap below the single best-scoring skill a skill may fall and still be
    /// injected. Suppresses the weak tail: when the top match is strong, only
    /// near-peers ride along; when only weak matches exist (or the leader was
    /// already injected this session), nothing clears the gate. Tuned alongside
    /// `min_similarity` per embedder.
    pub score_margin: f32,
    /// Max skills injected per prompt.
    pub max_skills: usize,
    /// Max total injected characters (budget; enforced in the hook path).
    pub char_budget: usize,
    /// Added to a skill's score per matching keyword.
    pub keyword_boost: f32,
    /// Added to a skill's score per matched trigger phrase (see
    /// [`crate::rank::phrase_score`]). Higher than `keyword_boost`: a full
    /// multi-token phrase match is stronger, higher-precision evidence than a
    /// single keyword token.
    pub phrase_boost: f32,
    /// Filesystem roots scanned for `SKILL.md` files.
    pub roots: Vec<PathBuf>,
    /// How matched skills are injected.
    pub inject_mode: InjectMode,
    /// Forcefulness of directive-mode injections.
    pub directive_strength: Strength,
    /// Skill ids never auto-injected.
    pub deny: Vec<String>,
    /// Skill ids injected whenever a keyword hits, even below `min_similarity`.
    pub force: Vec<String>,

    // --- Query-side context enrichment (see `crate::rank::rank_all_ctx`). A vague
    // follow-up prompt is disambiguated by signals from the turns before it. Two
    // channels share the recent-prompt window (`context_depth`):
    //   * the file-type channel (`file_boost`) — a document file named in the prompt
    //     or a recent turn (`.xlsx`, `.pdf`, ...) boosts its skill. High-precision
    //     and **on by default**: it adds no false-inject on any eval corpus and
    //     doubles recall on multi-turn document follow-ups.
    //   * the dense blend (`context_weight`) — a recency-weighted context vector
    //     mixed into stage-1. **Off by default**: it lifts multi-turn recall but
    //     admits a topic-switch false-inject no scalar floor separates from genuine
    //     vague follow-ups. Set `context_weight > 0` to opt in. ---
    /// How many recent prompts to retain as conversational context (0 = disabled).
    pub context_depth: usize,
    /// Max weight the context channel can add to a skill's score. The *effective*
    /// weight scales from this (a fully vague prompt) down to 0 (a confident,
    /// specific prompt) — see [`crate::rank::context_weight`]. Cosine-space, tuned
    /// per embedder; 0.0 disables the blend.
    pub context_weight: f32,
    /// Prompt best-self-cosine at/below which a prompt counts as *fully* vague
    /// (context applied at full `context_weight`).
    pub vague_lo: f32,
    /// Prompt best-self-cosine at/above which a prompt counts as confident
    /// (context suppressed entirely). Between `vague_lo` and this, context scales
    /// linearly.
    pub vague_hi: f32,
    /// Score added to a skill when a file of its type is referenced in the prompt
    /// or recent context (e.g. a `.xlsx` boosts `xlsx`; see
    /// [`crate::context::file_ids`]). High-precision and *not* vagueness-gated — a
    /// named file is unambiguous. 0.0 disables the channel.
    pub file_boost: f32,
    /// Score added to a skill whose ecosystem matches the working directory's
    /// project manifests or a code file referenced in the conversation (a
    /// `uv.lock` implies the *uv*/*python* terms, a named `etl.py` implies
    /// *python*, ...; see [`crate::context::project_terms`] /
    /// [`crate::context::code_terms`]). Terms resolve dynamically against the
    /// installed library ([`crate::context::skills_for_terms`]), so the channel
    /// surfaces whatever uv/rust/go skill the user actually has, by any name.
    /// Unlike a named file, this is an *ambient* signal present on every turn, so
    /// it is the weakest channel and is gated on the skill's own cosine sitting
    /// within [`crate::rank::PROJECT_GATE_SLACK`] of `min_similarity` in
    /// [`crate::rank::rank_all_ctx`] — it lifts a near-plausible ecosystem skill
    /// over the floor (deliberately recall-leaning: the model ignores a surfaced
    /// skill it doesn't need, and per-session dedup caps the cost at one showing)
    /// but never rescues a clearly-irrelevant one. 0.0 disables the channel.
    pub project_boost: f32,

    // --- Stage-2 reranking (see `crate::rerank`). The thresholds below are on the
    // cross-encoder's logit scale, unrelated to the cosine thresholds above, and
    // are *not* touched by `calibrate_to`. ---
    /// Stage-1 score below which a prompt is treated as having no relevant skill,
    /// so the (costly) reranker is skipped entirely.
    pub recall_floor: f32,
    /// Stage-1 score above which the top match may be a confident lone winner.
    pub high_conf: f32,
    /// Minimum stage-1 gap from the top match to the runner-up for the top to
    /// count as a *lone* winner (and thus skip reranking).
    pub clear_gap: f32,
    /// How many stage-1 candidates are handed to the reranker.
    pub rerank_top_k: usize,
    /// Minimum reranker logit for a skill to be injected.
    pub rerank_min: f32,
    /// Max reranker-logit gap below the best reranked skill for a peer to ride along.
    pub rerank_margin: f32,

    /// Confidence (`[0,1]`) at/above which a *lone* near-certain match is escalated
    /// from a directive pointer to a full body inject — the `SKILL.md` is inlined
    /// directly so the model can't skip the Skill-tool round-trip. Only fires in
    /// `inject_mode = directive` and only when exactly one skill is selected (two
    /// co-relevant peers mean we are *less* certain, so they stay directives). Set
    /// deliberately high: in practice this is reached only by a cross-encoder-
    /// confirmed match (the cosine→confidence map caps below it for bge), so a
    /// fluky stage-1 hit never triggers a body dump. Raise above `1.0` to disable.
    pub body_inject_min: f32,

    // --- Stage-1.5 lexical channel (see `crate::lexical`). BM25 over the full skill
    // description, a high-precision fast-path that injects a *dominant* lexical
    // winner directly, skipping the reranker — it rescues indirect prompts whose
    // discriminating vocabulary lives in the description prose but whose bi-encoder
    // cosine is muddy and whose reranker logit falls below the abstention floor.
    // Only fires when stage-1 has no confident lone dense winner. These thresholds
    // are on BM25's own scale, unrelated to the cosine/logit thresholds above and
    // untouched by `calibrate_to`. ---
    /// Minimum absolute BM25 score for the top description match to be a lexical
    /// winner. `<= 0` disables the channel entirely.
    pub lexical_min: f32,
    /// Minimum BM25 gap from the top description match to the runner-up for the top
    /// to count as *dominant* (and thus inject directly, skipping the reranker). The
    /// margin is what keeps the fast-path high-precision: a cluster of near-equal
    /// descriptions abstains and defers to the reranker.
    pub lexical_margin: f32,

    /// Append opt-in JSONL telemetry events (see [`crate::telemetry`]). Off by
    /// default. Enabled by this field *or* a truthy `SKI_TELEMETRY` env var —
    /// either one turns it on, so the env var still works without a config file.
    pub telemetry: bool,
}

impl Config {
    /// Adopt the active embedder's score thresholds. Cosine distributions are a
    /// property of the embedding space, not user preference, so `min_similarity`
    /// and `score_margin` follow the embedder that actually ran (bge vs the
    /// offline bag-of-words fallback). Other fields are left untouched.
    pub fn calibrate_to(&mut self, embedder: &dyn Embedder) {
        self.min_similarity = embedder.min_similarity();
        self.score_margin = embedder.score_margin();
    }

    /// Config scoped to `host`: discovery `roots` (and, via
    /// [`crate::paths::index_path`], the on-disk index) cover only that host's
    /// skill library. Keeps an injected skill name resolvable in the host that
    /// receives it — a Claude-only id never injects into opencode and vice versa.
    pub fn for_host(host: Host) -> Self {
        Self {
            roots: host_roots(host),
            ..Self::base()
        }
    }

    /// Host-scoped config with the user file ([`FileConfig`]) overlaid, returned
    /// alongside the parsed file. The file is returned so a caller that calibrates
    /// can re-assert the cosine pins afterward: [`Config::calibrate_to`] overwrites
    /// `min_similarity`/`score_margin` from the embedder and would otherwise clobber
    /// a user-set value. Callers that never calibrate can ignore the [`FileConfig`].
    pub fn load(host: Host) -> (Self, FileConfig) {
        let file = FileConfig::load();
        let mut cfg = Self::for_host(host);
        file.apply(&mut cfg);
        (cfg, file)
    }

    /// Every field except `roots`, which [`Config::for_host`] fills per host.
    fn base() -> Self {
        Self {
            model: "bge-small-en-v1.5".into(),
            min_similarity: 0.30,
            score_margin: 0.15,
            max_skills: 2,
            char_budget: 6000,
            keyword_boost: 0.15,
            phrase_boost: 0.20,
            roots: Vec::new(), // overwritten by `for_host`.
            inject_mode: InjectMode::Directive,
            directive_strength: Strength::Auto,
            deny: Vec::new(),
            force: Vec::new(),
            // The file-type channel is on by default (high-precision, zero added
            // false-inject across every eval corpus). The dense blend stays off
            // (`context_weight 0.0`): it lifts multi-turn recall but admits a
            // topic-switch false-inject no scalar floor separates. `context_depth`
            // keeps the rolling prompt window the file channel needs to see a file
            // attached a turn or two back; `vague_lo`/`hi` are bge-cosine-space
            // gates, live only for the dense blend once it is opted into.
            context_depth: 3,
            context_weight: 0.0,
            vague_lo: 0.55,
            vague_hi: 0.65,
            file_boost: 0.3,
            // Ambient project signal **on by default** now that terms resolve
            // dynamically against the installed library (the old hardcoded-id map
            // was inert outside one specific library). Sized to matter but not
            // dominate: enough to lift a near-floor ecosystem skill over the line
            // (with rank::PROJECT_GATE_SLACK) or into the leader's margin, well
            // below the file channel's 0.3 (a named document is far stronger
            // evidence than ambient workspace type).
            project_boost: 0.15,
            // Reranker gate + thresholds, calibrated against the JINA turbo
            // reranker (see `examples/rerank_probe`). Stage-1 top-1 accuracy: 76%
            // stage-1 only -> 88% with reranking.
            //
            // `recall_floor` skips the reranker when nothing is plausibly relevant.
            // bge is anisotropic (unrelated prompts still cosine ~0.5), which
            // compresses the usable range: 0.50 skips clearly-irrelevant prompts
            // without dropping real-but-weak matches. `high_conf` is effectively
            // disabled (2.0): a confidence-based skip measurably *hurt* accuracy,
            // because the bi-encoder is confidently wrong on the confusable pairs
            // the reranker exists to fix. It is retained as a tunable, not removed.
            //
            // `rerank_min` is tuned on the realistic ~120-skill index (17 anthropic
            // + highest-installed community skills from skills.sh; see
            // `tests/data/popular_skills_prompts.tsv`) — the artificially narrow
            // 17-skill anthropic library is *not* the tuning authority (its indirect
            // prompts have no good match and score like noise, which would pull the
            // floor too low). The earlier -2.5/-1.5 floors admitted a band of
            // negative-logit candidates — sigmoid(-1.5) ~= 0.18 confidence — that the
            // cross-encoder is itself signalling are *not* matches. Live telemetry
            // confirmed it: of the recommendations injected in the 0.18-0.24
            // confidence band ("commit and push" -> caveman-commit, "continue" ->
            // pickup, "/goal ..." -> skill-creator), essentially none were ever acted
            // on. Injecting them is the dominant usage-rate drag and erodes the whole
            // channel's credibility (the model learns to ignore *every*
            // SkillRecommendation, including the strong ones), so the floor abstains
            // on them. -1.1 (sigmoid ~= 0.25) is the precise recall-preserving
            // precision maximum on the realistic corpus: a strict improvement over
            // -1.5 there (recall held at 41/43 = 95%, false injects 3 -> 1 of 64);
            // tightening past it (-1.0) starts dropping real positives. The cost is
            // some recall on *indirect* prompts whose logit sits in -1.5..-1.1 — a
            // zone where genuine weak matches and live noise overlap and no scalar
            // separates them (cross-encoder pairs score independently of the index) —
            // but those are largely cases the host's own skill chooser covers anyway.
            // Larger embedders/rerankers (bge-base, jina-v2) tie this at higher cost,
            // so the gate is the lever. Sweep via `SKI_RERANK_MIN` in `examples/eval`.
            //
            // `rerank_min` alone can't catch every false inject: on a richer corpus
            // a no-match prompt's reranked logit interleaves with genuine weak
            // matches, so no scalar separates them. The complementary lever is the
            // stage-1 *agreement* gate in `rerank::passes` — a reranked skill's
            // bi-encoder score must sit within a small slack of `min_similarity`,
            // i.e. the reranker may reorder the retrieved-relevant set but not
            // resurrect a skill stage-1 judged irrelevant. That cut false injects a
            // further ~67% (3 -> 1 on the 52-negative realistic corpus) at no extra
            // compute, holding recall at 95%. See `rerank::AGREEMENT_SLACK`, `examples/eval`.
            //
            // 2026-06-23 REVERSAL — floor restored to -2.5. The precision tuning above
            // optimised the wrong objective. It rested on live telemetry showing the
            // 0.18-0.24 band was "never acted on" — but those injects were *also*
            // phrased timidly ("consider invoking it", the Low confidence-band verb in
            // `inject::directive_block`), so the signal conflated *wrong skill* with
            // *right skill, timid verb*. A controlled probe against the real host
            // (`claude -p`, see `[[ski-host-recall-gap]]`) separated them: the strong
            // host hand-rolls instead of invoking on indirect-task prompts (5/12 miss,
            // every miss a Bash hand-roll), and the SAME right skill is ignored 0/3
            // when phrased "consider…" but invoked 2/3..3/3 when phrased "invoke it
            // now, before you respond." The two over-tunes (high floor + timid verb)
            // interacted: lowering the floor admits the indirect matches, the firm verb
            // (now applied to every inject, see `inject.rs`) makes the host act on them.
            // FP cost stays low because a strong host *ignores* false injects even when
            // phrased firmly (3/3 in the probe) — so on a strong host ski's job is
            // RECALL of host-misses, where false positives are nearly free, not the
            // precision the old eval rewarded. -2.5 recovers docx/brand/xlsx misses;
            // pdf/pptx remain a small-model retrieval ceiling no floor can fix.
            recall_floor: 0.50,
            high_conf: 2.0,
            clear_gap: 0.12,
            rerank_top_k: 12,
            rerank_min: -2.5,
            rerank_margin: 2.0,
            // Body-escalate only a lone, cross-encoder-confirmed near-certain match
            // (sigmoid(2.45) ~= 0.92). High enough that a stage-1 cosine hit — whose
            // confidence maps to <= ~0.85 for bge — never triggers it, so only the
            // reranker's strongest verdicts inline the full SKILL.md.
            body_inject_min: 0.92,
            // Lexical fast-path: **off by default** (like `project_boost` / the dense
            // blend) — the eval gives it no free win on the realistic tuning corpus.
            // Measured across `examples/eval` (sweep `SKI_LEXICAL_MIN` /
            // `SKI_LEXICAL_MARGIN`):
            //   * It clearly helps *indirect task* prompts whose operation is spelled
            //     out in the description but whose cosine is muddy and reranker logit
            //     sub-floor — the document/design corpus jumps +4..+8 recall ("compute
            //     the totals and make a chart in my spreadsheet" -> xlsx; "OCR this
            //     scanned pdf" -> pdf; "looping reaction gif for slack" ->
            //     slack-gif-creator). These are dense+rerank abstentions.
            //   * It *hurts* on knowledge/explainer negatives that merely name a
            //     technology ("what year was rust released" -> rust-async-patterns;
            //     "difference between a docker container and a VM" -> docker-expert): a
            //     rare term dominantly hits one description and false-injects. The
            //     2-term overlap guard (`lexical::MIN_TERM_OVERLAP`) does not catch them.
            // No single threshold is a free win on the realistic corpus: the setting
            // that captures the big indirect-recall gain (min ~4, margin ~2) adds ~3
            // false injects there, while the do-no-harm setting (min ~6, margin ~5) is
            // near-neutral. Recommended opt-in for indirect-heavy libraries:
            // `lexical_min = 4`, `lexical_margin = 2`.
            lexical_min: 0.0,
            lexical_margin: 0.0,
            telemetry: false,
        }
    }
}

impl Default for Config {
    /// The Claude-scoped config. `ski index`/`why` (and the eval harness) default
    /// here; the hot paths build [`Config::for_host`] from their `--host` flag.
    fn default() -> Self {
        Self::for_host(Host::Claude)
    }
}

/// User overrides parsed from `~/.config/ski/config.toml`. Every field is
/// optional; an absent field — or an absent/malformed file — leaves the compiled
/// default untouched. Parsing fails open: a malformed file yields an empty
/// overlay (all defaults) rather than an error, so a bad config can never block
/// injection. Unknown keys are ignored (a typo drops one field, not the file).
#[derive(Debug, Clone, Default, Deserialize)]
#[serde(default)]
pub struct FileConfig {
    pub model: Option<String>,
    pub min_similarity: Option<f32>,
    pub score_margin: Option<f32>,
    pub max_skills: Option<usize>,
    pub char_budget: Option<usize>,
    pub keyword_boost: Option<f32>,
    pub phrase_boost: Option<f32>,
    pub roots: Option<Vec<PathBuf>>,
    pub inject_mode: Option<String>,
    pub directive_strength: Option<String>,
    pub deny: Option<Vec<String>>,
    pub force: Option<Vec<String>>,
    pub context_depth: Option<usize>,
    pub context_weight: Option<f32>,
    pub vague_lo: Option<f32>,
    pub vague_hi: Option<f32>,
    pub file_boost: Option<f32>,
    pub project_boost: Option<f32>,
    pub recall_floor: Option<f32>,
    pub high_conf: Option<f32>,
    pub clear_gap: Option<f32>,
    pub rerank_top_k: Option<usize>,
    pub rerank_min: Option<f32>,
    pub rerank_margin: Option<f32>,
    pub body_inject_min: Option<f32>,
    pub lexical_min: Option<f32>,
    pub lexical_margin: Option<f32>,
    pub telemetry: Option<bool>,
}

impl FileConfig {
    /// Every key the overlay understands, in struct order. Used by `ski doctor`
    /// to flag typos: unknown keys are silently ignored at runtime (by design),
    /// so a misspelled `deny` never applies and nothing says why. Adding a field
    /// to the struct? Add its key here (the `keys_cover_the_struct` test below
    /// catches a miss).
    pub const KEYS: &'static [&'static str] = &[
        "model",
        "min_similarity",
        "score_margin",
        "max_skills",
        "char_budget",
        "keyword_boost",
        "phrase_boost",
        "roots",
        "inject_mode",
        "directive_strength",
        "deny",
        "force",
        "context_depth",
        "context_weight",
        "vague_lo",
        "vague_hi",
        "file_boost",
        "project_boost",
        "recall_floor",
        "high_conf",
        "clear_gap",
        "rerank_top_k",
        "rerank_min",
        "rerank_margin",
        "body_inject_min",
        "lexical_min",
        "lexical_margin",
        "telemetry",
    ];

    /// Parse the user config, or an empty (all-default) overlay when the file is
    /// missing. Still fail-open on a malformed file (never blocks injection),
    /// but — unlike a missing file, which is a normal, silent no-op — a
    /// *present but unparseable* file means the user's overrides (`deny`, a
    /// tuned `rerank_min`, ...) are being silently ignored, which is worth a
    /// one-line stderr warning (with the parse location) so they can find out
    /// without already knowing to set `SKI_DEBUG`.
    pub fn load() -> Self {
        let path = crate::paths::config_path();
        let Ok(raw) = std::fs::read_to_string(&path) else {
            return Self::default(); // no file: not an error, nothing to say.
        };
        Self::parse(&raw).unwrap_or_else(|e| {
            let msg = e.to_string();
            let first = msg.lines().find(|l| !l.trim().is_empty()).unwrap_or("");
            eprintln!(
                "ski: {} is not valid TOML ({first}); ignoring it and using defaults",
                path.display()
            );
            Self::default()
        })
    }

    /// Pure TOML parse, shared by [`load`](Self::load), `ski doctor` (which
    /// reports the error loudly instead of failing open), and tests. `Err` on
    /// malformed input.
    pub(crate) fn parse(raw: &str) -> Result<Self, toml::de::Error> {
        toml::from_str(raw)
    }

    /// Overlay every present field onto `cfg`. `roots` is ignored while the
    /// `SKI_ROOTS` env override is active (env wins, for evals/tooling). Unknown
    /// `inject_mode`/`directive_strength` strings are ignored, keeping the default.
    pub fn apply(&self, cfg: &mut Config) {
        if let Some(v) = &self.model {
            cfg.model = v.clone();
        }
        self.apply_cosine(cfg);
        if let Some(v) = self.max_skills {
            cfg.max_skills = v;
        }
        if let Some(v) = self.char_budget {
            cfg.char_budget = v;
        }
        if let Some(v) = self.keyword_boost {
            cfg.keyword_boost = v;
        }
        if let Some(v) = self.phrase_boost {
            cfg.phrase_boost = v;
        }
        if let Some(v) = &self.roots {
            if std::env::var_os("SKI_ROOTS").is_none() {
                cfg.roots = v.clone();
            }
        }
        if let Some(m) = self.inject_mode.as_deref().and_then(parse_inject_mode) {
            cfg.inject_mode = m;
        }
        if let Some(s) = self.directive_strength.as_deref().and_then(parse_strength) {
            cfg.directive_strength = s;
        }
        if let Some(v) = &self.deny {
            cfg.deny = v.clone();
        }
        if let Some(v) = &self.force {
            cfg.force = v.clone();
        }
        if let Some(v) = self.context_depth {
            cfg.context_depth = v;
        }
        if let Some(v) = self.context_weight {
            cfg.context_weight = v;
        }
        if let Some(v) = self.vague_lo {
            cfg.vague_lo = v;
        }
        if let Some(v) = self.vague_hi {
            cfg.vague_hi = v;
        }
        if let Some(v) = self.file_boost {
            cfg.file_boost = v;
        }
        if let Some(v) = self.project_boost {
            cfg.project_boost = v;
        }
        if let Some(v) = self.recall_floor {
            cfg.recall_floor = v;
        }
        if let Some(v) = self.high_conf {
            cfg.high_conf = v;
        }
        if let Some(v) = self.clear_gap {
            cfg.clear_gap = v;
        }
        if let Some(v) = self.rerank_top_k {
            cfg.rerank_top_k = v;
        }
        if let Some(v) = self.rerank_min {
            cfg.rerank_min = v;
        }
        if let Some(v) = self.rerank_margin {
            cfg.rerank_margin = v;
        }
        if let Some(v) = self.body_inject_min {
            cfg.body_inject_min = v;
        }
        if let Some(v) = self.lexical_min {
            cfg.lexical_min = v;
        }
        if let Some(v) = self.lexical_margin {
            cfg.lexical_margin = v;
        }
        if let Some(v) = self.telemetry {
            cfg.telemetry = v;
        }
    }

    /// Re-assert just the cosine thresholds. [`Config::calibrate_to`] overwrites
    /// `min_similarity`/`score_margin` from the embedder, so a user pin must be
    /// applied *after* calibration to survive.
    pub fn apply_cosine(&self, cfg: &mut Config) {
        if let Some(v) = self.min_similarity {
            cfg.min_similarity = v;
        }
        if let Some(v) = self.score_margin {
            cfg.score_margin = v;
        }
    }
}

fn parse_inject_mode(s: &str) -> Option<InjectMode> {
    match s.trim().to_ascii_lowercase().as_str() {
        "directive" => Some(InjectMode::Directive),
        "body" => Some(InjectMode::Body),
        _ => None,
    }
}

fn parse_strength(s: &str) -> Option<Strength> {
    match s.trim().to_ascii_lowercase().as_str() {
        "auto" => Some(Strength::Auto),
        "soft" => Some(Strength::Soft),
        "hard" => Some(Strength::Hard),
        _ => None,
    }
}

/// Discovery roots for `host`. `SKI_ROOTS` (colon-separated) overrides for any
/// host — it lets evals/tools scope discovery to one skill library without a
/// config file (e.g. `SKI_ROOTS=~/.claude/plugins/marketplaces/anthropic-agent-skills`).
fn host_roots(host: Host) -> Vec<PathBuf> {
    if let Some(raw) = std::env::var_os("SKI_ROOTS") {
        let roots: Vec<PathBuf> = std::env::split_paths(&raw)
            .filter(|p| !p.as_os_str().is_empty())
            .collect();
        if !roots.is_empty() {
            return roots;
        }
    }
    match host {
        Host::Claude => {
            let mut v = Vec::new();
            if let Some(h) = std::env::var_os("HOME").map(PathBuf::from) {
                v.push(h.join(".claude/skills"));
                v.push(h.join(".claude/plugins"));
            }
            v.push(PathBuf::from(".claude/skills"));
            v
        }
        Host::Opencode => opencode_roots(),
    }
}

/// opencode declares its skill directories in `opencode.json` (`skills.paths`),
/// not a fixed directory, so its roots are read from the global config rather
/// than guessed. Absolute paths are used as-is; relative paths resolve against
/// the process cwd, which the hook subprocess inherits from opencode's project
/// dir. Project-local `opencode.json` overrides are a later milestone (the hook
/// does not yet consume the event's `cwd`).
fn opencode_roots() -> Vec<PathBuf> {
    let Some(cfg_path) = opencode_config_path() else {
        return Vec::new();
    };
    let Ok(raw) = std::fs::read_to_string(&cfg_path) else {
        return Vec::new();
    };
    parse_opencode_paths(&raw, std::env::current_dir().ok().as_deref())
}

/// Location of opencode's global config (`$XDG_CONFIG_HOME/opencode/opencode.json`,
/// default `~/.config/opencode/opencode.json`).
fn opencode_config_path() -> Option<PathBuf> {
    let base = std::env::var_os("XDG_CONFIG_HOME")
        .map(PathBuf::from)
        .or_else(|| std::env::var_os("HOME").map(|h| PathBuf::from(h).join(".config")))?;
    Some(base.join("opencode").join("opencode.json"))
}

/// Pull `skills.paths` out of an opencode config blob, resolving relative entries
/// against `cwd`. A missing key or malformed JSON yields no roots (fail open: no
/// injection rather than a wrong-host one). Pure core of [`opencode_roots`].
fn parse_opencode_paths(raw: &str, cwd: Option<&Path>) -> Vec<PathBuf> {
    let Ok(json) = serde_json::from_str::<serde_json::Value>(raw) else {
        return Vec::new();
    };
    let Some(paths) = json
        .get("skills")
        .and_then(|s| s.get("paths"))
        .and_then(|p| p.as_array())
    else {
        return Vec::new();
    };
    paths
        .iter()
        .filter_map(|v| v.as_str())
        .filter(|s| !s.is_empty())
        .map(|s| {
            let p = PathBuf::from(s);
            match cwd {
                Some(cwd) if p.is_relative() => cwd.join(p),
                _ => p,
            }
        })
        .collect()
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::embed::{bow::BowEmbedder, EmbedKind, Embedder};

    /// Stands in for a dense embedder with its own (non-default) thresholds.
    struct StubEmbedder;
    impl Embedder for StubEmbedder {
        fn id(&self) -> String {
            "stub".into()
        }
        fn embed(&self, _: &[String], _: EmbedKind) -> anyhow::Result<Vec<Vec<f32>>> {
            Ok(vec![])
        }
        fn min_similarity(&self) -> f32 {
            0.64
        }
        fn score_margin(&self) -> f32 {
            0.12
        }
    }

    #[test]
    fn calibrate_adopts_embedder_thresholds() {
        let mut cfg = Config::default();
        cfg.calibrate_to(&StubEmbedder);
        assert_eq!(cfg.min_similarity, 0.64);
        assert_eq!(cfg.score_margin, 0.12);
    }

    #[test]
    fn claude_roots_are_claude_scoped() {
        // Skip if an outer `SKI_ROOTS` override is active (it shadows both hosts).
        if std::env::var_os("SKI_ROOTS").is_some() {
            return;
        }
        let claude = host_roots(Host::Claude);
        assert!(claude
            .iter()
            .any(|p| p.to_string_lossy().contains(".claude/skills")));
        assert!(!claude
            .iter()
            .any(|p| p.to_string_lossy().contains("opencode")));
    }

    #[test]
    fn opencode_paths_parsed_and_resolved() {
        let json = r#"{"skills":{"paths":[".opencode/skills","/abs/repo"],"urls":[]}}"#;
        let roots = parse_opencode_paths(json, Some(Path::new("/proj")));
        assert_eq!(
            roots,
            vec![
                PathBuf::from("/proj/.opencode/skills"),
                PathBuf::from("/abs/repo"),
            ]
        );
    }

    #[test]
    fn opencode_paths_tolerate_missing_key_and_bad_json() {
        assert!(parse_opencode_paths("{}", None).is_empty());
        assert!(parse_opencode_paths(r#"{"skills":{}}"#, None).is_empty());
        assert!(parse_opencode_paths("not json", None).is_empty());
    }

    #[test]
    fn file_overlay_applies_present_fields_only() {
        let raw = r#"
            max_skills = 5
            rerank_min = -0.5
            deny = ["noisy-skill"]
            inject_mode = "body"
            directive_strength = "hard"
            telemetry = true
        "#;
        let file = FileConfig::parse(raw).unwrap();
        let mut cfg = Config::default();
        let (orig_model, orig_budget) = (cfg.model.clone(), cfg.char_budget);
        assert!(!cfg.telemetry); // off by default
        file.apply(&mut cfg);
        assert_eq!(cfg.max_skills, 5);
        assert_eq!(cfg.rerank_min, -0.5);
        assert_eq!(cfg.deny, ["noisy-skill"]);
        assert_eq!(cfg.inject_mode, InjectMode::Body);
        assert_eq!(cfg.directive_strength, Strength::Hard);
        assert!(cfg.telemetry); // enabled via config.toml
                                // Untouched fields keep their defaults.
        assert_eq!(cfg.model, orig_model);
        assert_eq!(cfg.char_budget, orig_budget);
    }

    #[test]
    fn cosine_pin_survives_calibration() {
        // A user pin must win even though calibrate_to runs after the overlay.
        let file = FileConfig::parse("min_similarity = 0.80").unwrap();
        let mut cfg = Config::default();
        file.apply(&mut cfg);
        cfg.calibrate_to(&StubEmbedder); // would set 0.64
        file.apply_cosine(&mut cfg); // re-assert the pin
        assert_eq!(cfg.min_similarity, 0.80);
        assert_eq!(cfg.score_margin, 0.12); // unpinned -> embedder value
    }

    #[test]
    fn malformed_file_is_empty_overlay() {
        assert!(FileConfig::parse("this is not = = toml").is_err());
    }

    #[test]
    fn load_falls_back_to_defaults_on_malformed_on_disk_file() {
        // FileConfig::load() must still fail open on a *present but unparseable*
        // config.toml (only the stderr warning is new — the caller never sees an
        // error and the result behaves exactly like a missing file).
        let dir = std::env::temp_dir().join(format!("ski-cfg-load-{}", std::process::id()));
        std::fs::create_dir_all(dir.join("ski")).unwrap();
        std::fs::write(dir.join("ski/config.toml"), "max_skills = \"nope\"").unwrap();

        let prev = std::env::var_os("XDG_CONFIG_HOME");
        std::env::set_var("XDG_CONFIG_HOME", &dir);
        let file = FileConfig::load();
        match prev {
            Some(v) => std::env::set_var("XDG_CONFIG_HOME", v),
            None => std::env::remove_var("XDG_CONFIG_HOME"),
        }

        let mut cfg = Config::default();
        let default_max_skills = cfg.max_skills;
        file.apply(&mut cfg);
        assert_eq!(cfg.max_skills, default_max_skills); // the bad field never applied
        let _ = std::fs::remove_dir_all(&dir);
    }

    #[test]
    fn keys_cover_the_struct() {
        // `FileConfig::KEYS` is hand-maintained for `ski doctor`'s typo check;
        // this pins it to the field list serde derives from the struct itself,
        // so adding a field without updating KEYS fails here instead of silently
        // weakening the doctor check. The probe deserializer only intercepts
        // `deserialize_struct` to grab serde's FIELDS array, then bails.
        struct Grab(std::cell::RefCell<Vec<&'static str>>);
        impl<'de> serde::Deserializer<'de> for &Grab {
            type Error = serde::de::value::Error;
            fn deserialize_any<V: serde::de::Visitor<'de>>(
                self,
                _: V,
            ) -> Result<V::Value, Self::Error> {
                Err(serde::de::Error::custom("probe"))
            }
            fn deserialize_struct<V: serde::de::Visitor<'de>>(
                self,
                _: &'static str,
                fields: &'static [&'static str],
                _: V,
            ) -> Result<V::Value, Self::Error> {
                self.0.borrow_mut().extend_from_slice(fields);
                Err(serde::de::Error::custom("probe"))
            }
            serde::forward_to_deserialize_any! {
                bool i8 i16 i32 i64 u8 u16 u32 u64 f32 f64 char str string bytes
                byte_buf option unit unit_struct newtype_struct seq tuple
                tuple_struct map enum identifier ignored_any
            }
        }
        let grab = Grab(Default::default());
        let _ = FileConfig::deserialize(&grab);
        assert_eq!(grab.0.into_inner(), FileConfig::KEYS);
    }

    #[test]
    fn unknown_keys_are_ignored() {
        let file = FileConfig::parse("bogus_key = 1\nmax_skills = 3").unwrap();
        let mut cfg = Config::default();
        file.apply(&mut cfg);
        assert_eq!(cfg.max_skills, 3);
    }

    #[test]
    fn bad_enum_string_keeps_default() {
        let file = FileConfig::parse(r#"inject_mode = "nonsense""#).unwrap();
        let mut cfg = Config::default();
        file.apply(&mut cfg);
        assert_eq!(cfg.inject_mode, InjectMode::Directive); // unchanged
    }

    #[test]
    fn calibrate_to_bow_uses_trait_defaults() {
        // The bag-of-words embedder doesn't override the trait defaults.
        let mut cfg = Config {
            min_similarity: 0.99,
            score_margin: 0.99,
            ..Default::default()
        };
        cfg.calibrate_to(&BowEmbedder::new());
        assert_eq!(cfg.min_similarity, 0.30);
        assert_eq!(cfg.score_margin, 0.15);
    }
}