1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
//! `ski` CLI. Milestones 1–3 implement `index`, `why`, `hook`, `observe`, and
//! `session-start`.
use anyhow::Result;
use clap::{Parser, Subcommand};
use ski::config::Config;
use ski::embed::{self, EmbedKind};
use ski::hook::{self, Host};
use ski::index::{self, Index};
use ski::{
context, doctor, history, init, lexical, observe, paths, pipeline, rank, rerank, session_start,
skill, status, suggest,
};
#[derive(Parser)]
#[command(
name = "ski",
version,
about = "skill-inject: local semantic skill auto-injection"
)]
struct Cli {
#[command(subcommand)]
cmd: Cmd,
}
#[derive(Subcommand)]
enum Cmd {
/// (Re)build the persistent skill index.
Index {
/// Ignore the existing index and re-embed everything.
#[arg(long)]
rebuild: bool,
/// Which host's skill library to index ('claude' or 'opencode').
#[arg(long, default_value = "claude")]
host: String,
},
/// Rank skills against a prompt and print scores (tuning aid).
Why {
/// The prompt (all trailing words are joined).
#[arg(required = true)]
prompt: Vec<String>,
/// How many ranked skills to show.
#[arg(long, default_value_t = 10)]
top: usize,
/// Which host's skill library to rank against ('claude' or 'opencode').
#[arg(long, default_value = "claude")]
host: String,
},
/// UserPromptSubmit hot-path: decide which skills to inject + emit the
/// host's injection contract. Driven by the hooks, not run by hand.
#[command(hide = true)]
Hook {
#[arg(long)]
host: String,
},
/// PostToolUse: record skills the model loaded itself. Driven by the hooks,
/// not run by hand.
#[command(hide = true)]
Observe {
#[arg(long)]
host: String,
},
/// SessionStart: incremental reindex + re-arm session state on compaction.
/// Driven by the hooks, not run by hand.
#[command(hide = true)]
SessionStart {
#[arg(long)]
host: String,
},
/// Check the whole install end to end — host wiring, config file, skill
/// discovery, index freshness, model cache, state dirs, plus a live
/// embed+rank smoke test — one line per check, with a concrete fix for
/// anything broken. Everything else in ski fails open (silently); this is
/// the loud "is it working?" answer. Exits 1 if a blocking problem is found.
Doctor {
/// Which host's install to check ('claude' or 'opencode').
#[arg(long, default_value = "claude")]
host: String,
},
/// Install ski's hooks/plugin for a host into your user config (the
/// marketplace-free setup path).
Init {
/// Which host to set up ('claude' or 'opencode').
host: String,
/// Install user-wide (required; per-project install is not yet supported).
#[arg(short = 'g', long)]
global: bool,
},
/// Read the opt-in telemetry log (recommendations vs. actual use). Default is
/// the aggregate readout; `--tail N` lists recent calls individually —
/// recommendations (prompt, per-candidate confidence, used?) and self-loads
/// (acted-on-rec vs. recall miss). `--compare` shows ski's ranking vs the
/// native chooser's actual pick per prompt (agreed / near-miss / buried /
/// absent) — where ski could get an edge. Empty unless hooks ran with
/// `SKI_TELEMETRY=1`.
History {
/// List the most recent N events individually (recommendations and
/// self-loads, newest last) instead of the aggregate.
#[arg(long)]
tail: Option<usize>,
/// When listing, only events whose session id contains this substring.
#[arg(long)]
session: Option<String>,
/// Show ski's ranking vs the native chooser's pick per prompt, classified
/// by where ski ranked what the model actually used.
#[arg(long)]
compare: bool,
},
/// Show what ski actually did in your recent conversations — skills it
/// surfaced that the model then invoked (assists), skills it surfaced the
/// model ignored, and skills the model loaded itself while ski stayed silent
/// (recall misses). Reads the per-session ledgers ski writes on every prompt,
/// so it works with no telemetry and no config. The plain-language answer to
/// "is ski helping me?", between `ski doctor` ("is it wired?") and `ski
/// history` (telemetry detail).
Status {
/// How many recent conversations to list (aggregate counts still span all).
#[arg(long, default_value_t = 10)]
limit: usize,
/// List every conversation on record, not just the most recent.
#[arg(long)]
all: bool,
},
/// Wipe per-session dedup state (re-arm injection for testing).
Clear {
/// Also truncate the telemetry log.
#[arg(long)]
telemetry: bool,
},
/// Turn the telemetry log into concrete tuning actions: `force`/keyword
/// suggestions for skills the model keeps self-loading while ski stays
/// silent, and `deny` suggestions for skills ski keeps injecting that are
/// never used. Read-only — nothing is applied automatically. Empty unless
/// hooks ran with telemetry enabled.
Suggest {
/// Which host's index to resolve skills against ('claude' or 'opencode').
#[arg(long, default_value = "claude")]
host: String,
},
}
fn main() -> Result<()> {
// Rust ignores SIGPIPE, so `ski why ... | head` used to panic with a
// broken-pipe backtrace once head closed the pipe. Restore the default
// die-quietly disposition, like other well-behaved CLI tools.
#[cfg(unix)]
unsafe {
libc::signal(libc::SIGPIPE, libc::SIG_DFL);
}
let cli = Cli::parse();
match cli.cmd {
Cmd::Index { rebuild, host } => cmd_index(host.parse::<Host>()?, rebuild),
Cmd::Why { prompt, top, host } => cmd_why(host.parse::<Host>()?, &prompt.join(" "), top),
Cmd::Hook { host } => hook::run(host.parse::<Host>()?),
Cmd::Observe { host } => observe::run(host.parse::<Host>()?),
Cmd::SessionStart { host } => session_start::run(host.parse::<Host>()?),
Cmd::Doctor { host } => doctor::run(host.parse::<Host>()?),
Cmd::Init { host, global } => init::run(host.parse::<Host>()?, global),
Cmd::History {
tail,
session,
compare,
} => history::run(tail, session.as_deref(), compare),
Cmd::Status { limit, all } => status::run(if all { usize::MAX } else { limit }),
Cmd::Clear { telemetry } => history::clear(telemetry),
Cmd::Suggest { host } => suggest::run(host.parse::<Host>()?),
}
}
fn cmd_index(host: Host, rebuild: bool) -> Result<()> {
let (cfg, _file) = Config::load(host);
let index_path = paths::index_path(host);
let discovery = skill::discover_all(&cfg.roots);
let embedder = embed::build(&cfg.model)?;
let prev = if rebuild {
None
} else {
Index::load(&index_path)?
};
let idx = index::build(&discovery.skills, embedder.as_ref(), prev.as_ref())?;
idx.save(&index_path)?;
println!(
"indexed {} skills ({} dims) via '{}' -> {}",
idx.skills.len(),
idx.dim,
idx.model,
index_path.display()
);
report_skipped(&discovery.skipped);
if idx.skills.is_empty() {
eprintln!(
"note: no skills found. Discovery roots for this host: {}",
format_roots(&cfg.roots)
);
eprintln!(" install skills there, or point `roots` in config.toml / SKI_ROOTS at your library.");
}
Ok(())
}
/// One stderr line per unusable `SKILL.md` (capped), so "my skill never
/// injects" is diagnosable instead of silent.
fn report_skipped(skipped: &[(std::path::PathBuf, String)]) {
const SHOW: usize = 10;
if skipped.is_empty() {
return;
}
eprintln!(
"note: skipped {} SKILL.md file(s) with unusable frontmatter:",
skipped.len()
);
for (path, reason) in skipped.iter().take(SHOW) {
eprintln!(" {}: {reason}", path.display());
}
if skipped.len() > SHOW {
eprintln!(" ... and {} more", skipped.len() - SHOW);
}
}
fn format_roots(roots: &[std::path::PathBuf]) -> String {
roots
.iter()
.map(|r| r.display().to_string())
.collect::<Vec<_>>()
.join(", ")
}
fn cmd_why(host: Host, prompt: &str, top: usize) -> Result<()> {
let (mut cfg, file) = Config::load(host);
let discovery = skill::discover_all(&cfg.roots);
report_skipped(&discovery.skipped);
let skills = discovery.skills;
if skills.is_empty() {
println!("no skills found in roots: {}", format_roots(&cfg.roots));
return Ok(());
}
let embedder = embed::build(&cfg.model)?;
cfg.calibrate_to(embedder.as_ref());
file.apply_cosine(&mut cfg); // user pin wins over embedder calibration.
// Reuse the persisted index instead of re-embedding the whole library on
// every invocation: `why` is the interactive tuning aid, and paying the full
// embed cost per call made it needlessly slow. Unchanged skills keep their
// cached vectors (same id+hash+model, exactly like the hook); the refreshed
// index is persisted back (best-effort) so the next `why`/hook reuses it too.
let index_path = paths::index_path(host);
let prev = Index::load(&index_path).ok().flatten();
let idx = index::build(&skills, embedder.as_ref(), prev.as_ref())?;
let _ = idx.save(&index_path);
let query = embedder
.embed(&[prompt.to_string()], EmbedKind::Query)?
.remove(0);
// Build the same channel inputs the hook does for a turn-1 prompt, so `why`
// reproduces the live decision rather than a context-free approximation: the
// file-type channel from the prompt text, the ambient project channel from the
// working directory, and the context-enriched rerank query. There is no
// conversation history here, so the context-blend vector is absent — exactly the
// hook's first turn.
let file_ids = if cfg.file_boost > 0.0 {
context::file_ids(prompt)
} else {
std::collections::BTreeSet::new()
};
let project_ids = if cfg.project_boost > 0.0 {
let mut terms = std::env::current_dir()
.ok()
.map(|d| context::project_terms(&d.to_string_lossy()))
.unwrap_or_default();
terms.extend(context::code_terms(prompt));
context::skills_for_terms(&terms, &idx)
.into_keys()
.collect()
} else {
std::collections::BTreeSet::new()
};
let hits = rank::rank_all_ctx(&query, None, &file_ids, &project_ids, prompt, &idx, &cfg);
let prompt_top = hits.iter().map(|h| h.cosine).fold(0.0_f32, f32::max);
let rerank_query = context::rerank_query(prompt, prompt_top, &[], !file_ids.is_empty(), &cfg);
// Whether stage-1 has a confident lone dense winner (suppresses the lexical
// fast-path), for the lexical block's verdict below.
let dense_confident = rerank::confident_winner(&hits, &cfg);
// The exact decision the hook would make, via the shared pipeline. A `*` marks a
// row that would actually inject (cleared the winning stage's gate — for the
// reranker that means `passes`, i.e. reranker thresholds *and* stage-1 agreement,
// not just `rerank_min`; for the lexical fast-path, the dominant BM25 winner).
let plan = pipeline::decide(&hits, &idx, prompt, &rerank_query, &cfg);
// Star exactly what the hook would inject: gate survivors minus deny, capped at
// `max_skills`. (The hook also applies session dedup, which `why` has no session
// for — so a star is "would inject on a fresh conversation".)
let injectable: std::collections::HashSet<&str> = plan
.passed
.iter()
.filter(|h| !cfg.deny.contains(&h.id))
.take(cfg.max_skills)
.map(|h| h.id.as_str())
.collect();
println!(
"stage {} threshold {:.2} prompt: {prompt:?}",
pipeline::stage_label(plan.stage, &idx.model),
plan.threshold
);
for h in plan.rows.iter().take(top) {
let mark = if injectable.contains(h.id.as_str()) {
"*"
} else {
" "
};
// Stage-1 channel attribution from the single-sourced breakdown, so `why`
// can never omit a channel the score includes (it previously dropped
// `project`). On a reranked row `h.score` is the logit; the breakdown still
// shows the preserved stage-1 channels behind it.
let parts = h
.breakdown()
.iter()
.map(|(label, v)| format!("{label} {v:.3}"))
.collect::<Vec<_>>()
.join(" + ");
println!("{mark} {:<26} score {:.3} ({parts})", h.name, h.score);
}
// Lexical (BM25-over-description) channel detail: a dominant winner injects
// directly, skipping the reranker (unless stage-1 has a confident lone dense
// winner). Shown as a tuning aid — the top BM25 scores and whether the dominance
// gate fires at the active `lexical_min` / `lexical_margin`.
let lex = lexical::scores(prompt, &idx);
if let Some(top) = lex.first() {
let second = lex.get(1).map(|l| l.score).unwrap_or(0.0);
let fires = lexical::dominant(prompt, &idx, &cfg).is_some();
let verdict = if cfg.lexical_min <= 0.0 {
"off".to_string()
} else if dense_confident {
"deferred (confident dense winner)".to_string()
} else if fires {
format!("FIRES -> {}", top.id)
} else {
"no dominant winner".to_string()
};
println!(
"\nlexical(BM25): min {:.2} margin {:.2} -> {verdict}",
cfg.lexical_min, cfg.lexical_margin,
);
println!(
" top gap {:.3} (#1 {:.3} - #2 {:.3})",
top.score - second,
top.score,
second
);
for l in lex.iter().take(5) {
println!(" {:<26} bm25 {:.3}", l.id, l.score);
}
}
Ok(())
}