1use ski::confidence::Stage;
32use ski::config::Config;
33use ski::embed::{self, EmbedKind};
34use ski::hook::Host;
35use ski::rank::Hit;
36use ski::{context, index, pipeline, rank, skill};
37
38const FP_HARM: f32 = 0.15;
45
46struct Case {
47 want: String, kind: String,
49 prompt: String,
50 context: Vec<String>,
53 cwd: String,
56}
57
58fn parse_cases(raw: &str) -> Vec<Case> {
59 raw.lines()
60 .filter(|l| !l.trim().is_empty() && !l.trim_start().starts_with('#'))
61 .filter_map(|l| {
62 let mut it = l.splitn(5, '\t');
63 let want = it.next()?.trim().to_string();
64 let kind = it.next()?.trim().to_string();
65 let prompt = it.next()?.trim().to_string();
66 let context = it
67 .next()
68 .map(|c| {
69 c.split('|')
70 .map(|p| p.trim().to_string())
71 .filter(|p| !p.is_empty())
72 .collect()
73 })
74 .unwrap_or_default();
75 let cwd = it.next().map(|c| c.trim().to_string()).unwrap_or_default();
76 if prompt.is_empty() {
77 return None;
78 }
79 Some(Case {
80 want,
81 kind,
82 prompt,
83 context,
84 cwd,
85 })
86 })
87 .collect()
88}
89
90fn main() -> anyhow::Result<()> {
91 let args: Vec<String> = std::env::args().skip(1).collect();
92 let verbose = args.iter().any(|a| a == "-v" || a == "--verbose");
93 let path = args
94 .iter()
95 .find(|a| !a.starts_with('-'))
96 .cloned()
97 .unwrap_or_else(|| "tests/data/popular_skills_prompts.tsv".to_string());
98
99 let raw = std::fs::read_to_string(&path)?;
100 let cases = parse_cases(&raw);
101
102 let (mut cfg, file) = Config::load(Host::Claude);
103 if let Ok(v) = std::env::var("SKI_PHRASE_BOOST") {
106 cfg.phrase_boost = v.parse().expect("SKI_PHRASE_BOOST must be a float");
107 }
108 if let Ok(v) = std::env::var("SKI_CONTEXT_DEPTH") {
112 cfg.context_depth = v.parse().expect("SKI_CONTEXT_DEPTH must be a usize");
113 }
114 if let Ok(v) = std::env::var("SKI_CONTEXT_WEIGHT") {
115 cfg.context_weight = v.parse().expect("SKI_CONTEXT_WEIGHT must be a float");
116 }
117 if let Ok(v) = std::env::var("SKI_VAGUE_LO") {
118 cfg.vague_lo = v.parse().expect("SKI_VAGUE_LO must be a float");
119 }
120 if let Ok(v) = std::env::var("SKI_VAGUE_HI") {
121 cfg.vague_hi = v.parse().expect("SKI_VAGUE_HI must be a float");
122 }
123 if let Ok(v) = std::env::var("SKI_FILE_BOOST") {
124 cfg.file_boost = v.parse().expect("SKI_FILE_BOOST must be a float");
125 }
126 if let Ok(v) = std::env::var("SKI_PROJECT_BOOST") {
127 cfg.project_boost = v.parse().expect("SKI_PROJECT_BOOST must be a float");
128 }
129 if let Ok(v) = std::env::var("SKI_RERANK_MIN") {
133 cfg.rerank_min = v.parse().expect("SKI_RERANK_MIN must be a float");
134 }
135 if let Ok(v) = std::env::var("SKI_RERANK_MARGIN") {
136 cfg.rerank_margin = v.parse().expect("SKI_RERANK_MARGIN must be a float");
137 }
138 if let Ok(v) = std::env::var("SKI_LEXICAL_MIN") {
141 cfg.lexical_min = v.parse().expect("SKI_LEXICAL_MIN must be a float");
142 }
143 if let Ok(v) = std::env::var("SKI_LEXICAL_MARGIN") {
144 cfg.lexical_margin = v.parse().expect("SKI_LEXICAL_MARGIN must be a float");
145 }
146 let skills = skill::discover(&cfg.roots)?;
147 let embedder = embed::build(&cfg.model)?;
148 cfg.calibrate_to(embedder.as_ref());
149 file.apply_cosine(&mut cfg);
150 let idx = index::build(&skills, embedder.as_ref(), None)?;
151 eprintln!(
152 "index: {} skills via {} | rerank_min {:.2} margin {:.2} | min_sim {:.2} | lexical_min {:.2} margin {:.2}",
153 idx.skills.len(),
154 idx.model,
155 cfg.rerank_min,
156 cfg.rerank_margin,
157 cfg.min_similarity,
158 cfg.lexical_min,
159 cfg.lexical_margin,
160 );
161
162 let (mut tp, mut fn_, mut fp, mut tn) = (0u32, 0u32, 0u32, 0u32);
164 let (mut n_pos, mut n_neg) = (0u32, 0u32);
165 let mut fp_rows: Vec<String> = Vec::new();
166 let mut fn_rows: Vec<String> = Vec::new();
167 let (mut recall_at_k, mut stage1_top1) = (0u32, 0u32);
173 let mut recall_miss_rows: Vec<String> = Vec::new();
174
175 for c in &cases {
176 let query = embedder
177 .embed(std::slice::from_ref(&c.prompt), EmbedKind::Query)?
178 .remove(0);
179 let cvec = context::vector(embedder.as_ref(), &c.context, &cfg)?;
180 let file_text = format!("{} {}", c.context.join(" "), c.prompt);
183 let file_ids = context::file_ids(&file_text);
184 let project_ids = if cfg.project_boost > 0.0 {
188 let mut terms = context::project_terms(&c.cwd);
189 terms.extend(context::code_terms(&file_text));
190 context::skills_for_terms(&terms, &idx)
191 .into_keys()
192 .collect()
193 } else {
194 std::collections::BTreeSet::new()
195 };
196 let hits = rank::rank_all_ctx(
197 &query,
198 cvec.as_deref(),
199 &file_ids,
200 &project_ids,
201 &c.prompt,
202 &idx,
203 &cfg,
204 );
205 let prompt_top = hits.iter().map(|h| h.cosine).fold(0.0_f32, f32::max);
208 let rerank_query = context::rerank_query(
209 &c.prompt,
210 prompt_top,
211 &c.context,
212 !file_ids.is_empty(),
213 &cfg,
214 );
215 let plan = pipeline::decide(&hits, &idx, &c.prompt, &rerank_query, &cfg);
216 let stage = match plan.stage {
217 Stage::Lexical => "lexical",
218 Stage::Rerank => "rerank",
219 Stage::Cosine => "stage1",
220 };
221 let injected: Vec<Hit> = plan
224 .passed
225 .into_iter()
226 .filter(|h| !cfg.deny.contains(&h.id))
227 .take(cfg.max_skills)
228 .collect();
229 let ids: Vec<String> = injected.iter().map(|h| h.id.clone()).collect();
230 let is_neg = c.want == "(none)";
231 let observe_only = c.kind == "borderline";
232
233 if verbose {
234 let top: Vec<String> = hits
235 .iter()
236 .take(4)
237 .map(|h| format!("{}={:.3}", h.id, h.score))
238 .collect();
239 let inj: Vec<String> = injected
240 .iter()
241 .map(|h| {
242 format!(
243 "{}=L{:.2}/cos{:.3}+ctx{:.2}+file{:.2}+proj{:.2}+kw{:.2}+ph{:.2}",
244 h.id, h.score, h.cosine, h.context, h.file, h.project, h.keyword, h.phrase
245 )
246 })
247 .collect();
248 eprintln!(
249 "[{:<10}] {:<7} inject=[{}] top: {} :: {}",
250 c.kind,
251 stage,
252 inj.join(", "),
253 top.join(", "),
254 c.prompt,
255 );
256 }
257
258 if observe_only {
259 continue;
260 }
261 if is_neg {
262 n_neg += 1;
263 if injected.is_empty() {
264 tn += 1;
265 } else {
266 fp += 1;
267 fp_rows.push(format!(
268 " FP [{:<10}] inject=[{}] :: {}",
269 c.kind,
270 ids.join(", "),
271 c.prompt
272 ));
273 }
274 } else {
275 n_pos += 1;
276 let rank = hits.iter().position(|h| h.id == c.want);
279 if rank == Some(0) {
280 stage1_top1 += 1;
281 }
282 if rank.is_some_and(|r| r < cfg.rerank_top_k) {
283 recall_at_k += 1;
284 } else {
285 recall_miss_rows.push(format!(
286 " R@k MISS [{:<10}] want={} stage-1 rank={} :: {}",
287 c.kind,
288 c.want,
289 rank.map_or_else(|| "absent".to_string(), |r| r.to_string()),
290 c.prompt
291 ));
292 }
293 if ids.iter().any(|id| id == &c.want) {
294 tp += 1;
295 } else {
296 fn_ += 1;
297 fn_rows.push(format!(
298 " FN [{:<10}] want={} got=[{}] :: {}",
299 c.kind,
300 c.want,
301 ids.join(", "),
302 c.prompt
303 ));
304 }
305 }
306 }
307
308 println!("\n=== eval: {} ===", path);
309 println!(
310 "positives {n_pos}: recall {tp}/{n_pos} ({:.0}%) misses {fn_}",
311 pct(tp, n_pos)
312 );
313 println!(
314 "negatives {n_neg}: false-inject {fp}/{n_neg} ({:.0}%) clean {tn}",
315 pct(fp, n_neg)
316 );
317 let recall_rate = if n_pos == 0 {
320 0.0
321 } else {
322 tp as f32 / n_pos as f32
323 };
324 let fp_rate = if n_neg == 0 {
325 0.0
326 } else {
327 fp as f32 / n_neg as f32
328 };
329 println!(
330 "host-value {:.0}% (= recall {:.0}% - {FP_HARM} * fp {:.0}%; FP discounted: a strong host ignores false injects)",
331 100.0 * (recall_rate - FP_HARM * fp_rate),
332 100.0 * recall_rate,
333 100.0 * fp_rate,
334 );
335 println!(
336 "stage-1 (pre-rerank, k={}): recall@k {recall_at_k}/{n_pos} ({:.0}%) top-1 {stage1_top1}/{n_pos} ({:.0}%)",
337 cfg.rerank_top_k,
338 pct(recall_at_k, n_pos),
339 pct(stage1_top1, n_pos),
340 );
341 if !recall_miss_rows.is_empty() {
342 println!(
343 "--- stage-1 recall@k misses (gold below top-{}) ---",
344 cfg.rerank_top_k
345 );
346 recall_miss_rows.iter().for_each(|r| println!("{r}"));
347 }
348 if !fn_rows.is_empty() {
349 println!("--- recall misses ---");
350 fn_rows.iter().for_each(|r| println!("{r}"));
351 }
352 if !fp_rows.is_empty() {
353 println!("--- false injections ---");
354 fp_rows.iter().for_each(|r| println!("{r}"));
355 }
356 Ok(())
357}
358
359fn pct(n: u32, d: u32) -> f32 {
360 if d == 0 {
361 0.0
362 } else {
363 100.0 * n as f32 / d as f32
364 }
365}