ski/config.rs
1//! Runtime configuration. Compiled defaults ([`Config::base`]), overlaid by an
2//! optional user file (`~/.config/ski/config.toml`, see [`FileConfig`]) loaded
3//! through [`Config::load`]. The file is the escape hatch: silence a noisy skill
4//! with `deny`, pin `rerank_min`, widen `max_skills`, etc. without a rebuild.
5
6use crate::embed::Embedder;
7use crate::hook::Host;
8use serde::Deserialize;
9use std::path::{Path, PathBuf};
10
11/// How a matched skill is delivered to the model.
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum InjectMode {
14 /// Tell the model a relevant skill exists and let it load the file (keeps
15 /// model agency; the v1 default).
16 Directive,
17 /// Inject the `SKILL.md` body straight into context.
18 Body,
19}
20
21/// Forcefulness of a `directive`-mode injection.
22#[derive(Debug, Clone, Copy, PartialEq, Eq)]
23pub enum Strength {
24 /// Resolve from the host (Claude -> soft, opencode -> hard).
25 Auto,
26 /// A nudge — enough for a strong native chooser.
27 Soft,
28 /// An imperative — for weak local choosers.
29 Hard,
30}
31
32#[derive(Debug, Clone)]
33pub struct Config {
34 /// Embedding model id. Recognized by the fastembed backend; otherwise the
35 /// offline bag-of-words backend is used regardless of this value.
36 pub model: String,
37 /// Minimum hybrid score for a skill to be eligible for injection.
38 pub min_similarity: f32,
39 /// Max gap below the single best-scoring skill a skill may fall and still be
40 /// injected. Suppresses the weak tail: when the top match is strong, only
41 /// near-peers ride along; when only weak matches exist (or the leader was
42 /// already injected this session), nothing clears the gate. Tuned alongside
43 /// `min_similarity` per embedder.
44 pub score_margin: f32,
45 /// Max skills injected per prompt.
46 pub max_skills: usize,
47 /// Max total injected characters (budget; enforced in the hook path).
48 pub char_budget: usize,
49 /// Added to a skill's score per matching keyword.
50 pub keyword_boost: f32,
51 /// Added to a skill's score per matched trigger phrase (see
52 /// [`crate::rank::phrase_score`]). Higher than `keyword_boost`: a full
53 /// multi-token phrase match is stronger, higher-precision evidence than a
54 /// single keyword token.
55 pub phrase_boost: f32,
56 /// Filesystem roots scanned for `SKILL.md` files.
57 pub roots: Vec<PathBuf>,
58 /// How matched skills are injected.
59 pub inject_mode: InjectMode,
60 /// Forcefulness of directive-mode injections.
61 pub directive_strength: Strength,
62 /// Skill ids never auto-injected.
63 pub deny: Vec<String>,
64 /// Skill ids injected whenever a keyword hits, even below `min_similarity`.
65 pub force: Vec<String>,
66
67 // --- Query-side context enrichment (see `crate::rank::rank_all_ctx`). A vague
68 // follow-up prompt is disambiguated by signals from the turns before it. Two
69 // channels share the recent-prompt window (`context_depth`):
70 // * the file-type channel (`file_boost`) — a document file named in the prompt
71 // or a recent turn (`.xlsx`, `.pdf`, ...) boosts its skill. High-precision
72 // and **on by default**: it adds no false-inject on any eval corpus and
73 // doubles recall on multi-turn document follow-ups.
74 // * the dense blend (`context_weight`) — a recency-weighted context vector
75 // mixed into stage-1. **Off by default**: it lifts multi-turn recall but
76 // admits a topic-switch false-inject no scalar floor separates from genuine
77 // vague follow-ups. Set `context_weight > 0` to opt in. ---
78 /// How many recent prompts to retain as conversational context (0 = disabled).
79 pub context_depth: usize,
80 /// Max weight the context channel can add to a skill's score. The *effective*
81 /// weight scales from this (a fully vague prompt) down to 0 (a confident,
82 /// specific prompt) — see [`crate::rank::context_weight`]. Cosine-space, tuned
83 /// per embedder; 0.0 disables the blend.
84 pub context_weight: f32,
85 /// Prompt best-self-cosine at/below which a prompt counts as *fully* vague
86 /// (context applied at full `context_weight`).
87 pub vague_lo: f32,
88 /// Prompt best-self-cosine at/above which a prompt counts as confident
89 /// (context suppressed entirely). Between `vague_lo` and this, context scales
90 /// linearly.
91 pub vague_hi: f32,
92 /// Score added to a skill when a file of its type is referenced in the prompt
93 /// or recent context (e.g. a `.xlsx` boosts `xlsx`; see
94 /// [`crate::context::file_ids`]). High-precision and *not* vagueness-gated — a
95 /// named file is unambiguous. 0.0 disables the channel.
96 pub file_boost: f32,
97 /// Score added to a skill whose ecosystem matches the working directory's
98 /// project manifests or a code file referenced in the conversation (a
99 /// `uv.lock` implies the *uv*/*python* terms, a named `etl.py` implies
100 /// *python*, ...; see [`crate::context::project_terms`] /
101 /// [`crate::context::code_terms`]). Terms resolve dynamically against the
102 /// installed library ([`crate::context::skills_for_terms`]), so the channel
103 /// surfaces whatever uv/rust/go skill the user actually has, by any name.
104 /// Unlike a named file, this is an *ambient* signal present on every turn, so
105 /// it is the weakest channel and is gated on the skill's own cosine sitting
106 /// within [`crate::rank::PROJECT_GATE_SLACK`] of `min_similarity` in
107 /// [`crate::rank::rank_all_ctx`] — it lifts a near-plausible ecosystem skill
108 /// over the floor (deliberately recall-leaning: the model ignores a surfaced
109 /// skill it doesn't need, and per-session dedup caps the cost at one showing)
110 /// but never rescues a clearly-irrelevant one. 0.0 disables the channel.
111 pub project_boost: f32,
112
113 // --- Stage-2 reranking (see `crate::rerank`). The thresholds below are on the
114 // cross-encoder's logit scale, unrelated to the cosine thresholds above, and
115 // are *not* touched by `calibrate_to`. ---
116 /// Stage-1 score below which a prompt is treated as having no relevant skill,
117 /// so the (costly) reranker is skipped entirely.
118 pub recall_floor: f32,
119 /// Stage-1 score above which the top match may be a confident lone winner.
120 pub high_conf: f32,
121 /// Minimum stage-1 gap from the top match to the runner-up for the top to
122 /// count as a *lone* winner (and thus skip reranking).
123 pub clear_gap: f32,
124 /// How many stage-1 candidates are handed to the reranker.
125 pub rerank_top_k: usize,
126 /// Minimum reranker logit for a skill to be injected.
127 pub rerank_min: f32,
128 /// Max reranker-logit gap below the best reranked skill for a peer to ride along.
129 pub rerank_margin: f32,
130
131 /// Confidence (`[0,1]`) at/above which a *lone* near-certain match is escalated
132 /// from a directive pointer to a full body inject — the `SKILL.md` is inlined
133 /// directly so the model can't skip the Skill-tool round-trip. Only fires in
134 /// `inject_mode = directive` and only when exactly one skill is selected (two
135 /// co-relevant peers mean we are *less* certain, so they stay directives). Set
136 /// deliberately high: in practice this is reached only by a cross-encoder-
137 /// confirmed match (the cosine→confidence map caps below it for bge), so a
138 /// fluky stage-1 hit never triggers a body dump. Raise above `1.0` to disable.
139 pub body_inject_min: f32,
140
141 // --- Stage-1.5 lexical channel (see `crate::lexical`). BM25 over the full skill
142 // description, a high-precision fast-path that injects a *dominant* lexical
143 // winner directly, skipping the reranker — it rescues indirect prompts whose
144 // discriminating vocabulary lives in the description prose but whose bi-encoder
145 // cosine is muddy and whose reranker logit falls below the abstention floor.
146 // Only fires when stage-1 has no confident lone dense winner. These thresholds
147 // are on BM25's own scale, unrelated to the cosine/logit thresholds above and
148 // untouched by `calibrate_to`. ---
149 /// Minimum absolute BM25 score for the top description match to be a lexical
150 /// winner. `<= 0` disables the channel entirely.
151 pub lexical_min: f32,
152 /// Minimum BM25 gap from the top description match to the runner-up for the top
153 /// to count as *dominant* (and thus inject directly, skipping the reranker). The
154 /// margin is what keeps the fast-path high-precision: a cluster of near-equal
155 /// descriptions abstains and defers to the reranker.
156 pub lexical_margin: f32,
157
158 /// Append opt-in JSONL telemetry events (see [`crate::telemetry`]). Off by
159 /// default. Enabled by this field *or* a truthy `SKI_TELEMETRY` env var —
160 /// either one turns it on, so the env var still works without a config file.
161 pub telemetry: bool,
162}
163
164impl Config {
165 /// Adopt the active embedder's score thresholds. Cosine distributions are a
166 /// property of the embedding space, not user preference, so `min_similarity`
167 /// and `score_margin` follow the embedder that actually ran (bge vs the
168 /// offline bag-of-words fallback). Other fields are left untouched.
169 pub fn calibrate_to(&mut self, embedder: &dyn Embedder) {
170 self.min_similarity = embedder.min_similarity();
171 self.score_margin = embedder.score_margin();
172 }
173
174 /// Config scoped to `host`: discovery `roots` (and, via
175 /// [`crate::paths::index_path`], the on-disk index) cover only that host's
176 /// skill library. Keeps an injected skill name resolvable in the host that
177 /// receives it — a Claude-only id never injects into opencode and vice versa.
178 pub fn for_host(host: Host) -> Self {
179 Self {
180 roots: host_roots(host),
181 ..Self::base()
182 }
183 }
184
185 /// Host-scoped config with the user file ([`FileConfig`]) overlaid, returned
186 /// alongside the parsed file. The file is returned so a caller that calibrates
187 /// can re-assert the cosine pins afterward: [`Config::calibrate_to`] overwrites
188 /// `min_similarity`/`score_margin` from the embedder and would otherwise clobber
189 /// a user-set value. Callers that never calibrate can ignore the [`FileConfig`].
190 pub fn load(host: Host) -> (Self, FileConfig) {
191 let file = FileConfig::load();
192 let mut cfg = Self::for_host(host);
193 file.apply(&mut cfg);
194 (cfg, file)
195 }
196
197 /// Every field except `roots`, which [`Config::for_host`] fills per host.
198 fn base() -> Self {
199 Self {
200 model: "bge-small-en-v1.5".into(),
201 min_similarity: 0.30,
202 score_margin: 0.15,
203 max_skills: 2,
204 char_budget: 6000,
205 keyword_boost: 0.15,
206 phrase_boost: 0.20,
207 roots: Vec::new(), // overwritten by `for_host`.
208 inject_mode: InjectMode::Directive,
209 directive_strength: Strength::Auto,
210 deny: Vec::new(),
211 force: Vec::new(),
212 // The file-type channel is on by default (high-precision, zero added
213 // false-inject across every eval corpus). The dense blend stays off
214 // (`context_weight 0.0`): it lifts multi-turn recall but admits a
215 // topic-switch false-inject no scalar floor separates. `context_depth`
216 // keeps the rolling prompt window the file channel needs to see a file
217 // attached a turn or two back; `vague_lo`/`hi` are bge-cosine-space
218 // gates, live only for the dense blend once it is opted into.
219 context_depth: 3,
220 context_weight: 0.0,
221 vague_lo: 0.55,
222 vague_hi: 0.65,
223 file_boost: 0.3,
224 // Ambient project signal **on by default** now that terms resolve
225 // dynamically against the installed library (the old hardcoded-id map
226 // was inert outside one specific library). Sized to matter but not
227 // dominate: enough to lift a near-floor ecosystem skill over the line
228 // (with rank::PROJECT_GATE_SLACK) or into the leader's margin, well
229 // below the file channel's 0.3 (a named document is far stronger
230 // evidence than ambient workspace type).
231 project_boost: 0.15,
232 // Reranker gate + thresholds, calibrated against the JINA turbo
233 // reranker (see `examples/rerank_probe`). Stage-1 top-1 accuracy: 76%
234 // stage-1 only -> 88% with reranking.
235 //
236 // `recall_floor` skips the reranker when nothing is plausibly relevant.
237 // bge is anisotropic (unrelated prompts still cosine ~0.5), which
238 // compresses the usable range: 0.50 skips clearly-irrelevant prompts
239 // without dropping real-but-weak matches. `high_conf` is effectively
240 // disabled (2.0): a confidence-based skip measurably *hurt* accuracy,
241 // because the bi-encoder is confidently wrong on the confusable pairs
242 // the reranker exists to fix. It is retained as a tunable, not removed.
243 //
244 // `rerank_min` is tuned on the realistic ~120-skill index (17 anthropic
245 // + highest-installed community skills from skills.sh; see
246 // `tests/data/popular_skills_prompts.tsv`) — the artificially narrow
247 // 17-skill anthropic library is *not* the tuning authority (its indirect
248 // prompts have no good match and score like noise, which would pull the
249 // floor too low). The earlier -2.5/-1.5 floors admitted a band of
250 // negative-logit candidates — sigmoid(-1.5) ~= 0.18 confidence — that the
251 // cross-encoder is itself signalling are *not* matches. Live telemetry
252 // confirmed it: of the recommendations injected in the 0.18-0.24
253 // confidence band ("commit and push" -> caveman-commit, "continue" ->
254 // pickup, "/goal ..." -> skill-creator), essentially none were ever acted
255 // on. Injecting them is the dominant usage-rate drag and erodes the whole
256 // channel's credibility (the model learns to ignore *every*
257 // SkillRecommendation, including the strong ones), so the floor abstains
258 // on them. -1.1 (sigmoid ~= 0.25) is the precise recall-preserving
259 // precision maximum on the realistic corpus: a strict improvement over
260 // -1.5 there (recall held at 41/43 = 95%, false injects 3 -> 1 of 64);
261 // tightening past it (-1.0) starts dropping real positives. The cost is
262 // some recall on *indirect* prompts whose logit sits in -1.5..-1.1 — a
263 // zone where genuine weak matches and live noise overlap and no scalar
264 // separates them (cross-encoder pairs score independently of the index) —
265 // but those are largely cases the host's own skill chooser covers anyway.
266 // Larger embedders/rerankers (bge-base, jina-v2) tie this at higher cost,
267 // so the gate is the lever. Sweep via `SKI_RERANK_MIN` in `examples/eval`.
268 //
269 // `rerank_min` alone can't catch every false inject: on a richer corpus
270 // a no-match prompt's reranked logit interleaves with genuine weak
271 // matches, so no scalar separates them. The complementary lever is the
272 // stage-1 *agreement* gate in `rerank::passes` — a reranked skill's
273 // bi-encoder score must sit within a small slack of `min_similarity`,
274 // i.e. the reranker may reorder the retrieved-relevant set but not
275 // resurrect a skill stage-1 judged irrelevant. That cut false injects a
276 // further ~67% (3 -> 1 on the 52-negative realistic corpus) at no extra
277 // compute, holding recall at 95%. See `rerank::AGREEMENT_SLACK`, `examples/eval`.
278 //
279 // 2026-06-23 REVERSAL — floor restored to -2.5. The precision tuning above
280 // optimised the wrong objective. It rested on live telemetry showing the
281 // 0.18-0.24 band was "never acted on" — but those injects were *also*
282 // phrased timidly ("consider invoking it", the Low confidence-band verb in
283 // `inject::directive_block`), so the signal conflated *wrong skill* with
284 // *right skill, timid verb*. A controlled probe against the real host
285 // (`claude -p`, see `[[ski-host-recall-gap]]`) separated them: the strong
286 // host hand-rolls instead of invoking on indirect-task prompts (5/12 miss,
287 // every miss a Bash hand-roll), and the SAME right skill is ignored 0/3
288 // when phrased "consider…" but invoked 2/3..3/3 when phrased "invoke it
289 // now, before you respond." The two over-tunes (high floor + timid verb)
290 // interacted: lowering the floor admits the indirect matches, the firm verb
291 // (now applied to every inject, see `inject.rs`) makes the host act on them.
292 // FP cost stays low because a strong host *ignores* false injects even when
293 // phrased firmly (3/3 in the probe) — so on a strong host ski's job is
294 // RECALL of host-misses, where false positives are nearly free, not the
295 // precision the old eval rewarded. -2.5 recovers docx/brand/xlsx misses;
296 // pdf/pptx remain a small-model retrieval ceiling no floor can fix.
297 recall_floor: 0.50,
298 high_conf: 2.0,
299 clear_gap: 0.12,
300 rerank_top_k: 12,
301 rerank_min: -2.5,
302 rerank_margin: 2.0,
303 // Body-escalate only a lone, cross-encoder-confirmed near-certain match
304 // (sigmoid(2.45) ~= 0.92). High enough that a stage-1 cosine hit — whose
305 // confidence maps to <= ~0.85 for bge — never triggers it, so only the
306 // reranker's strongest verdicts inline the full SKILL.md.
307 body_inject_min: 0.92,
308 // Lexical fast-path: **off by default** (like `project_boost` / the dense
309 // blend) — the eval gives it no free win on the realistic tuning corpus.
310 // Measured across `examples/eval` (sweep `SKI_LEXICAL_MIN` /
311 // `SKI_LEXICAL_MARGIN`):
312 // * It clearly helps *indirect task* prompts whose operation is spelled
313 // out in the description but whose cosine is muddy and reranker logit
314 // sub-floor — the document/design corpus jumps +4..+8 recall ("compute
315 // the totals and make a chart in my spreadsheet" -> xlsx; "OCR this
316 // scanned pdf" -> pdf; "looping reaction gif for slack" ->
317 // slack-gif-creator). These are dense+rerank abstentions.
318 // * It *hurts* on knowledge/explainer negatives that merely name a
319 // technology ("what year was rust released" -> rust-async-patterns;
320 // "difference between a docker container and a VM" -> docker-expert): a
321 // rare term dominantly hits one description and false-injects. The
322 // 2-term overlap guard (`lexical::MIN_TERM_OVERLAP`) does not catch them.
323 // No single threshold is a free win on the realistic corpus: the setting
324 // that captures the big indirect-recall gain (min ~4, margin ~2) adds ~3
325 // false injects there, while the do-no-harm setting (min ~6, margin ~5) is
326 // near-neutral. Recommended opt-in for indirect-heavy libraries:
327 // `lexical_min = 4`, `lexical_margin = 2`.
328 lexical_min: 0.0,
329 lexical_margin: 0.0,
330 telemetry: false,
331 }
332 }
333}
334
335impl Default for Config {
336 /// The Claude-scoped config. `ski index`/`why` (and the eval harness) default
337 /// here; the hot paths build [`Config::for_host`] from their `--host` flag.
338 fn default() -> Self {
339 Self::for_host(Host::Claude)
340 }
341}
342
343/// User overrides parsed from `~/.config/ski/config.toml`. Every field is
344/// optional; an absent field — or an absent/malformed file — leaves the compiled
345/// default untouched. Parsing fails open: a malformed file yields an empty
346/// overlay (all defaults) rather than an error, so a bad config can never block
347/// injection. Unknown keys are ignored (a typo drops one field, not the file).
348#[derive(Debug, Clone, Default, Deserialize)]
349#[serde(default)]
350pub struct FileConfig {
351 pub model: Option<String>,
352 pub min_similarity: Option<f32>,
353 pub score_margin: Option<f32>,
354 pub max_skills: Option<usize>,
355 pub char_budget: Option<usize>,
356 pub keyword_boost: Option<f32>,
357 pub phrase_boost: Option<f32>,
358 pub roots: Option<Vec<PathBuf>>,
359 pub inject_mode: Option<String>,
360 pub directive_strength: Option<String>,
361 pub deny: Option<Vec<String>>,
362 pub force: Option<Vec<String>>,
363 pub context_depth: Option<usize>,
364 pub context_weight: Option<f32>,
365 pub vague_lo: Option<f32>,
366 pub vague_hi: Option<f32>,
367 pub file_boost: Option<f32>,
368 pub project_boost: Option<f32>,
369 pub recall_floor: Option<f32>,
370 pub high_conf: Option<f32>,
371 pub clear_gap: Option<f32>,
372 pub rerank_top_k: Option<usize>,
373 pub rerank_min: Option<f32>,
374 pub rerank_margin: Option<f32>,
375 pub body_inject_min: Option<f32>,
376 pub lexical_min: Option<f32>,
377 pub lexical_margin: Option<f32>,
378 pub telemetry: Option<bool>,
379}
380
381impl FileConfig {
382 /// Every key the overlay understands, in struct order. Used by `ski doctor`
383 /// to flag typos: unknown keys are silently ignored at runtime (by design),
384 /// so a misspelled `deny` never applies and nothing says why. Adding a field
385 /// to the struct? Add its key here (the `keys_cover_the_struct` test below
386 /// catches a miss).
387 pub const KEYS: &'static [&'static str] = &[
388 "model",
389 "min_similarity",
390 "score_margin",
391 "max_skills",
392 "char_budget",
393 "keyword_boost",
394 "phrase_boost",
395 "roots",
396 "inject_mode",
397 "directive_strength",
398 "deny",
399 "force",
400 "context_depth",
401 "context_weight",
402 "vague_lo",
403 "vague_hi",
404 "file_boost",
405 "project_boost",
406 "recall_floor",
407 "high_conf",
408 "clear_gap",
409 "rerank_top_k",
410 "rerank_min",
411 "rerank_margin",
412 "body_inject_min",
413 "lexical_min",
414 "lexical_margin",
415 "telemetry",
416 ];
417
418 /// Parse the user config, or an empty (all-default) overlay when the file is
419 /// missing. Still fail-open on a malformed file (never blocks injection),
420 /// but — unlike a missing file, which is a normal, silent no-op — a
421 /// *present but unparseable* file means the user's overrides (`deny`, a
422 /// tuned `rerank_min`, ...) are being silently ignored, which is worth a
423 /// one-line stderr warning (with the parse location) so they can find out
424 /// without already knowing to set `SKI_DEBUG`.
425 pub fn load() -> Self {
426 let path = crate::paths::config_path();
427 let Ok(raw) = std::fs::read_to_string(&path) else {
428 return Self::default(); // no file: not an error, nothing to say.
429 };
430 Self::parse(&raw).unwrap_or_else(|e| {
431 let msg = e.to_string();
432 let first = msg.lines().find(|l| !l.trim().is_empty()).unwrap_or("");
433 eprintln!(
434 "ski: {} is not valid TOML ({first}); ignoring it and using defaults",
435 path.display()
436 );
437 Self::default()
438 })
439 }
440
441 /// Pure TOML parse, shared by [`load`](Self::load), `ski doctor` (which
442 /// reports the error loudly instead of failing open), and tests. `Err` on
443 /// malformed input.
444 pub(crate) fn parse(raw: &str) -> Result<Self, toml::de::Error> {
445 toml::from_str(raw)
446 }
447
448 /// Overlay every present field onto `cfg`. `roots` is ignored while the
449 /// `SKI_ROOTS` env override is active (env wins, for evals/tooling). Unknown
450 /// `inject_mode`/`directive_strength` strings are ignored, keeping the default.
451 pub fn apply(&self, cfg: &mut Config) {
452 if let Some(v) = &self.model {
453 cfg.model = v.clone();
454 }
455 self.apply_cosine(cfg);
456 if let Some(v) = self.max_skills {
457 cfg.max_skills = v;
458 }
459 if let Some(v) = self.char_budget {
460 cfg.char_budget = v;
461 }
462 if let Some(v) = self.keyword_boost {
463 cfg.keyword_boost = v;
464 }
465 if let Some(v) = self.phrase_boost {
466 cfg.phrase_boost = v;
467 }
468 if let Some(v) = &self.roots {
469 if std::env::var_os("SKI_ROOTS").is_none() {
470 cfg.roots = v.clone();
471 }
472 }
473 if let Some(m) = self.inject_mode.as_deref().and_then(parse_inject_mode) {
474 cfg.inject_mode = m;
475 }
476 if let Some(s) = self.directive_strength.as_deref().and_then(parse_strength) {
477 cfg.directive_strength = s;
478 }
479 if let Some(v) = &self.deny {
480 cfg.deny = v.clone();
481 }
482 if let Some(v) = &self.force {
483 cfg.force = v.clone();
484 }
485 if let Some(v) = self.context_depth {
486 cfg.context_depth = v;
487 }
488 if let Some(v) = self.context_weight {
489 cfg.context_weight = v;
490 }
491 if let Some(v) = self.vague_lo {
492 cfg.vague_lo = v;
493 }
494 if let Some(v) = self.vague_hi {
495 cfg.vague_hi = v;
496 }
497 if let Some(v) = self.file_boost {
498 cfg.file_boost = v;
499 }
500 if let Some(v) = self.project_boost {
501 cfg.project_boost = v;
502 }
503 if let Some(v) = self.recall_floor {
504 cfg.recall_floor = v;
505 }
506 if let Some(v) = self.high_conf {
507 cfg.high_conf = v;
508 }
509 if let Some(v) = self.clear_gap {
510 cfg.clear_gap = v;
511 }
512 if let Some(v) = self.rerank_top_k {
513 cfg.rerank_top_k = v;
514 }
515 if let Some(v) = self.rerank_min {
516 cfg.rerank_min = v;
517 }
518 if let Some(v) = self.rerank_margin {
519 cfg.rerank_margin = v;
520 }
521 if let Some(v) = self.body_inject_min {
522 cfg.body_inject_min = v;
523 }
524 if let Some(v) = self.lexical_min {
525 cfg.lexical_min = v;
526 }
527 if let Some(v) = self.lexical_margin {
528 cfg.lexical_margin = v;
529 }
530 if let Some(v) = self.telemetry {
531 cfg.telemetry = v;
532 }
533 }
534
535 /// Re-assert just the cosine thresholds. [`Config::calibrate_to`] overwrites
536 /// `min_similarity`/`score_margin` from the embedder, so a user pin must be
537 /// applied *after* calibration to survive.
538 pub fn apply_cosine(&self, cfg: &mut Config) {
539 if let Some(v) = self.min_similarity {
540 cfg.min_similarity = v;
541 }
542 if let Some(v) = self.score_margin {
543 cfg.score_margin = v;
544 }
545 }
546}
547
548fn parse_inject_mode(s: &str) -> Option<InjectMode> {
549 match s.trim().to_ascii_lowercase().as_str() {
550 "directive" => Some(InjectMode::Directive),
551 "body" => Some(InjectMode::Body),
552 _ => None,
553 }
554}
555
556fn parse_strength(s: &str) -> Option<Strength> {
557 match s.trim().to_ascii_lowercase().as_str() {
558 "auto" => Some(Strength::Auto),
559 "soft" => Some(Strength::Soft),
560 "hard" => Some(Strength::Hard),
561 _ => None,
562 }
563}
564
565/// Discovery roots for `host`. `SKI_ROOTS` (colon-separated) overrides for any
566/// host — it lets evals/tools scope discovery to one skill library without a
567/// config file (e.g. `SKI_ROOTS=~/.claude/plugins/marketplaces/anthropic-agent-skills`).
568fn host_roots(host: Host) -> Vec<PathBuf> {
569 if let Some(raw) = std::env::var_os("SKI_ROOTS") {
570 let roots: Vec<PathBuf> = std::env::split_paths(&raw)
571 .filter(|p| !p.as_os_str().is_empty())
572 .collect();
573 if !roots.is_empty() {
574 return roots;
575 }
576 }
577 match host {
578 Host::Claude => {
579 let mut v = Vec::new();
580 if let Some(h) = std::env::var_os("HOME").map(PathBuf::from) {
581 v.push(h.join(".claude/skills"));
582 v.push(h.join(".claude/plugins"));
583 }
584 v.push(PathBuf::from(".claude/skills"));
585 v
586 }
587 Host::Opencode => opencode_roots(),
588 }
589}
590
591/// opencode declares its skill directories in `opencode.json` (`skills.paths`),
592/// not a fixed directory, so its roots are read from the global config rather
593/// than guessed. Absolute paths are used as-is; relative paths resolve against
594/// the process cwd, which the hook subprocess inherits from opencode's project
595/// dir. Project-local `opencode.json` overrides are a later milestone (the hook
596/// does not yet consume the event's `cwd`).
597fn opencode_roots() -> Vec<PathBuf> {
598 let Some(cfg_path) = opencode_config_path() else {
599 return Vec::new();
600 };
601 let Ok(raw) = std::fs::read_to_string(&cfg_path) else {
602 return Vec::new();
603 };
604 parse_opencode_paths(&raw, std::env::current_dir().ok().as_deref())
605}
606
607/// Location of opencode's global config (`$XDG_CONFIG_HOME/opencode/opencode.json`,
608/// default `~/.config/opencode/opencode.json`).
609fn opencode_config_path() -> Option<PathBuf> {
610 let base = std::env::var_os("XDG_CONFIG_HOME")
611 .map(PathBuf::from)
612 .or_else(|| std::env::var_os("HOME").map(|h| PathBuf::from(h).join(".config")))?;
613 Some(base.join("opencode").join("opencode.json"))
614}
615
616/// Pull `skills.paths` out of an opencode config blob, resolving relative entries
617/// against `cwd`. A missing key or malformed JSON yields no roots (fail open: no
618/// injection rather than a wrong-host one). Pure core of [`opencode_roots`].
619fn parse_opencode_paths(raw: &str, cwd: Option<&Path>) -> Vec<PathBuf> {
620 let Ok(json) = serde_json::from_str::<serde_json::Value>(raw) else {
621 return Vec::new();
622 };
623 let Some(paths) = json
624 .get("skills")
625 .and_then(|s| s.get("paths"))
626 .and_then(|p| p.as_array())
627 else {
628 return Vec::new();
629 };
630 paths
631 .iter()
632 .filter_map(|v| v.as_str())
633 .filter(|s| !s.is_empty())
634 .map(|s| {
635 let p = PathBuf::from(s);
636 match cwd {
637 Some(cwd) if p.is_relative() => cwd.join(p),
638 _ => p,
639 }
640 })
641 .collect()
642}
643
644#[cfg(test)]
645mod tests {
646 use super::*;
647 use crate::embed::{bow::BowEmbedder, EmbedKind, Embedder};
648
649 /// Stands in for a dense embedder with its own (non-default) thresholds.
650 struct StubEmbedder;
651 impl Embedder for StubEmbedder {
652 fn id(&self) -> String {
653 "stub".into()
654 }
655 fn embed(&self, _: &[String], _: EmbedKind) -> anyhow::Result<Vec<Vec<f32>>> {
656 Ok(vec![])
657 }
658 fn min_similarity(&self) -> f32 {
659 0.64
660 }
661 fn score_margin(&self) -> f32 {
662 0.12
663 }
664 }
665
666 #[test]
667 fn calibrate_adopts_embedder_thresholds() {
668 let mut cfg = Config::default();
669 cfg.calibrate_to(&StubEmbedder);
670 assert_eq!(cfg.min_similarity, 0.64);
671 assert_eq!(cfg.score_margin, 0.12);
672 }
673
674 #[test]
675 fn claude_roots_are_claude_scoped() {
676 // Skip if an outer `SKI_ROOTS` override is active (it shadows both hosts).
677 if std::env::var_os("SKI_ROOTS").is_some() {
678 return;
679 }
680 let claude = host_roots(Host::Claude);
681 assert!(claude
682 .iter()
683 .any(|p| p.to_string_lossy().contains(".claude/skills")));
684 assert!(!claude
685 .iter()
686 .any(|p| p.to_string_lossy().contains("opencode")));
687 }
688
689 #[test]
690 fn opencode_paths_parsed_and_resolved() {
691 let json = r#"{"skills":{"paths":[".opencode/skills","/abs/repo"],"urls":[]}}"#;
692 let roots = parse_opencode_paths(json, Some(Path::new("/proj")));
693 assert_eq!(
694 roots,
695 vec![
696 PathBuf::from("/proj/.opencode/skills"),
697 PathBuf::from("/abs/repo"),
698 ]
699 );
700 }
701
702 #[test]
703 fn opencode_paths_tolerate_missing_key_and_bad_json() {
704 assert!(parse_opencode_paths("{}", None).is_empty());
705 assert!(parse_opencode_paths(r#"{"skills":{}}"#, None).is_empty());
706 assert!(parse_opencode_paths("not json", None).is_empty());
707 }
708
709 #[test]
710 fn file_overlay_applies_present_fields_only() {
711 let raw = r#"
712 max_skills = 5
713 rerank_min = -0.5
714 deny = ["noisy-skill"]
715 inject_mode = "body"
716 directive_strength = "hard"
717 telemetry = true
718 "#;
719 let file = FileConfig::parse(raw).unwrap();
720 let mut cfg = Config::default();
721 let (orig_model, orig_budget) = (cfg.model.clone(), cfg.char_budget);
722 assert!(!cfg.telemetry); // off by default
723 file.apply(&mut cfg);
724 assert_eq!(cfg.max_skills, 5);
725 assert_eq!(cfg.rerank_min, -0.5);
726 assert_eq!(cfg.deny, ["noisy-skill"]);
727 assert_eq!(cfg.inject_mode, InjectMode::Body);
728 assert_eq!(cfg.directive_strength, Strength::Hard);
729 assert!(cfg.telemetry); // enabled via config.toml
730 // Untouched fields keep their defaults.
731 assert_eq!(cfg.model, orig_model);
732 assert_eq!(cfg.char_budget, orig_budget);
733 }
734
735 #[test]
736 fn cosine_pin_survives_calibration() {
737 // A user pin must win even though calibrate_to runs after the overlay.
738 let file = FileConfig::parse("min_similarity = 0.80").unwrap();
739 let mut cfg = Config::default();
740 file.apply(&mut cfg);
741 cfg.calibrate_to(&StubEmbedder); // would set 0.64
742 file.apply_cosine(&mut cfg); // re-assert the pin
743 assert_eq!(cfg.min_similarity, 0.80);
744 assert_eq!(cfg.score_margin, 0.12); // unpinned -> embedder value
745 }
746
747 #[test]
748 fn malformed_file_is_empty_overlay() {
749 assert!(FileConfig::parse("this is not = = toml").is_err());
750 }
751
752 #[test]
753 fn load_falls_back_to_defaults_on_malformed_on_disk_file() {
754 // FileConfig::load() must still fail open on a *present but unparseable*
755 // config.toml (only the stderr warning is new — the caller never sees an
756 // error and the result behaves exactly like a missing file).
757 let dir = std::env::temp_dir().join(format!("ski-cfg-load-{}", std::process::id()));
758 std::fs::create_dir_all(dir.join("ski")).unwrap();
759 std::fs::write(dir.join("ski/config.toml"), "max_skills = \"nope\"").unwrap();
760
761 let prev = std::env::var_os("XDG_CONFIG_HOME");
762 std::env::set_var("XDG_CONFIG_HOME", &dir);
763 let file = FileConfig::load();
764 match prev {
765 Some(v) => std::env::set_var("XDG_CONFIG_HOME", v),
766 None => std::env::remove_var("XDG_CONFIG_HOME"),
767 }
768
769 let mut cfg = Config::default();
770 let default_max_skills = cfg.max_skills;
771 file.apply(&mut cfg);
772 assert_eq!(cfg.max_skills, default_max_skills); // the bad field never applied
773 let _ = std::fs::remove_dir_all(&dir);
774 }
775
776 #[test]
777 fn keys_cover_the_struct() {
778 // `FileConfig::KEYS` is hand-maintained for `ski doctor`'s typo check;
779 // this pins it to the field list serde derives from the struct itself,
780 // so adding a field without updating KEYS fails here instead of silently
781 // weakening the doctor check. The probe deserializer only intercepts
782 // `deserialize_struct` to grab serde's FIELDS array, then bails.
783 struct Grab(std::cell::RefCell<Vec<&'static str>>);
784 impl<'de> serde::Deserializer<'de> for &Grab {
785 type Error = serde::de::value::Error;
786 fn deserialize_any<V: serde::de::Visitor<'de>>(
787 self,
788 _: V,
789 ) -> Result<V::Value, Self::Error> {
790 Err(serde::de::Error::custom("probe"))
791 }
792 fn deserialize_struct<V: serde::de::Visitor<'de>>(
793 self,
794 _: &'static str,
795 fields: &'static [&'static str],
796 _: V,
797 ) -> Result<V::Value, Self::Error> {
798 self.0.borrow_mut().extend_from_slice(fields);
799 Err(serde::de::Error::custom("probe"))
800 }
801 serde::forward_to_deserialize_any! {
802 bool i8 i16 i32 i64 u8 u16 u32 u64 f32 f64 char str string bytes
803 byte_buf option unit unit_struct newtype_struct seq tuple
804 tuple_struct map enum identifier ignored_any
805 }
806 }
807 let grab = Grab(Default::default());
808 let _ = FileConfig::deserialize(&grab);
809 assert_eq!(grab.0.into_inner(), FileConfig::KEYS);
810 }
811
812 #[test]
813 fn unknown_keys_are_ignored() {
814 let file = FileConfig::parse("bogus_key = 1\nmax_skills = 3").unwrap();
815 let mut cfg = Config::default();
816 file.apply(&mut cfg);
817 assert_eq!(cfg.max_skills, 3);
818 }
819
820 #[test]
821 fn bad_enum_string_keeps_default() {
822 let file = FileConfig::parse(r#"inject_mode = "nonsense""#).unwrap();
823 let mut cfg = Config::default();
824 file.apply(&mut cfg);
825 assert_eq!(cfg.inject_mode, InjectMode::Directive); // unchanged
826 }
827
828 #[test]
829 fn calibrate_to_bow_uses_trait_defaults() {
830 // The bag-of-words embedder doesn't override the trait defaults.
831 let mut cfg = Config {
832 min_similarity: 0.99,
833 score_margin: 0.99,
834 ..Default::default()
835 };
836 cfg.calibrate_to(&BowEmbedder::new());
837 assert_eq!(cfg.min_similarity, 0.30);
838 assert_eq!(cfg.score_margin, 0.15);
839 }
840}