Skip to main content

code_terms

Function code_terms 

Source
pub fn code_terms(text: &str) -> Vec<String>
Expand description

Ecosystem terms implied by code files referenced in text (a prompt and/or recent-window turns), via [ext_terms]. Order-preserving, de-duplicated.

Examples found in repository?
examples/eval.rs (line 189)
90fn main() -> anyhow::Result<()> {
91    let args: Vec<String> = std::env::args().skip(1).collect();
92    let verbose = args.iter().any(|a| a == "-v" || a == "--verbose");
93    let path = args
94        .iter()
95        .find(|a| !a.starts_with('-'))
96        .cloned()
97        .unwrap_or_else(|| "tests/data/popular_skills_prompts.tsv".to_string());
98
99    let raw = std::fs::read_to_string(&path)?;
100    let cases = parse_cases(&raw);
101
102    let (mut cfg, file) = Config::load(Host::Claude);
103    // A/B affordance: override the phrase-channel boost (0.0 disables it) so the
104    // same corpus can be scored with and without the channel in one rebuild.
105    if let Ok(v) = std::env::var("SKI_PHRASE_BOOST") {
106        cfg.phrase_boost = v.parse().expect("SKI_PHRASE_BOOST must be a float");
107    }
108    // Context enrichment (Goal 3) is off by default; these env knobs activate and
109    // tune it for one run, mirroring SKI_PHRASE_BOOST, so the same corpus can be
110    // scored with and without conversational context.
111    if let Ok(v) = std::env::var("SKI_CONTEXT_DEPTH") {
112        cfg.context_depth = v.parse().expect("SKI_CONTEXT_DEPTH must be a usize");
113    }
114    if let Ok(v) = std::env::var("SKI_CONTEXT_WEIGHT") {
115        cfg.context_weight = v.parse().expect("SKI_CONTEXT_WEIGHT must be a float");
116    }
117    if let Ok(v) = std::env::var("SKI_VAGUE_LO") {
118        cfg.vague_lo = v.parse().expect("SKI_VAGUE_LO must be a float");
119    }
120    if let Ok(v) = std::env::var("SKI_VAGUE_HI") {
121        cfg.vague_hi = v.parse().expect("SKI_VAGUE_HI must be a float");
122    }
123    if let Ok(v) = std::env::var("SKI_FILE_BOOST") {
124        cfg.file_boost = v.parse().expect("SKI_FILE_BOOST must be a float");
125    }
126    if let Ok(v) = std::env::var("SKI_PROJECT_BOOST") {
127        cfg.project_boost = v.parse().expect("SKI_PROJECT_BOOST must be a float");
128    }
129    // Reranker-gate sweep knobs: tune the stage-2 abstention floor/margin for one
130    // run without editing config.toml (these are on the logit scale, untouched by
131    // `calibrate_to`).
132    if let Ok(v) = std::env::var("SKI_RERANK_MIN") {
133        cfg.rerank_min = v.parse().expect("SKI_RERANK_MIN must be a float");
134    }
135    if let Ok(v) = std::env::var("SKI_RERANK_MARGIN") {
136        cfg.rerank_margin = v.parse().expect("SKI_RERANK_MARGIN must be a float");
137    }
138    // Lexical fast-path (BM25 over description) sweep knobs: `lexical_min <= 0`
139    // disables it, so the same corpus can be scored with and without the channel.
140    if let Ok(v) = std::env::var("SKI_LEXICAL_MIN") {
141        cfg.lexical_min = v.parse().expect("SKI_LEXICAL_MIN must be a float");
142    }
143    if let Ok(v) = std::env::var("SKI_LEXICAL_MARGIN") {
144        cfg.lexical_margin = v.parse().expect("SKI_LEXICAL_MARGIN must be a float");
145    }
146    let skills = skill::discover(&cfg.roots)?;
147    let embedder = embed::build(&cfg.model)?;
148    cfg.calibrate_to(embedder.as_ref());
149    file.apply_cosine(&mut cfg);
150    let idx = index::build(&skills, embedder.as_ref(), None)?;
151    eprintln!(
152        "index: {} skills via {} | rerank_min {:.2} margin {:.2} | min_sim {:.2} | lexical_min {:.2} margin {:.2}",
153        idx.skills.len(),
154        idx.model,
155        cfg.rerank_min,
156        cfg.rerank_margin,
157        cfg.min_similarity,
158        cfg.lexical_min,
159        cfg.lexical_margin,
160    );
161
162    // Confusion counters. `borderline` rows are tallied separately (observe-only).
163    let (mut tp, mut fn_, mut fp, mut tn) = (0u32, 0u32, 0u32, 0u32);
164    let (mut n_pos, mut n_neg) = (0u32, 0u32);
165    let mut fp_rows: Vec<String> = Vec::new();
166    let mut fn_rows: Vec<String> = Vec::new();
167    // Stage-1 retrieval ceiling (pre-rerank), over positives only: recall@k is the
168    // fraction whose gold skill survives into the top-`rerank_top_k` candidates the
169    // reranker is fed (`rerank::rerank` takes exactly that many); top-1 is the
170    // fraction already ranked first by hybrid score. recall@k ~100% means retrieval
171    // is not the bottleneck and the problem is ranking within the retrieved set.
172    let (mut recall_at_k, mut stage1_top1) = (0u32, 0u32);
173    let mut recall_miss_rows: Vec<String> = Vec::new();
174
175    for c in &cases {
176        let query = embedder
177            .embed(std::slice::from_ref(&c.prompt), EmbedKind::Query)?
178            .remove(0);
179        let cvec = context::vector(embedder.as_ref(), &c.context, &cfg)?;
180        // File-type channel: scan this turn's prompt AND its prior context for named
181        // files (a `.xlsx` etc.), mapping each to its skill.
182        let file_text = format!("{} {}", c.context.join(" "), c.prompt);
183        let file_ids = context::file_ids(&file_text);
184        // Ambient project-type channel: the case's cwd (5th column) yields
185        // ecosystem terms (plus any code file named in the conversation), resolved
186        // against the installed index. Empty when the channel is off.
187        let project_ids = if cfg.project_boost > 0.0 {
188            let mut terms = context::project_terms(&c.cwd);
189            terms.extend(context::code_terms(&file_text));
190            context::skills_for_terms(&terms, &idx)
191                .into_keys()
192                .collect()
193        } else {
194            std::collections::BTreeSet::new()
195        };
196        let hits = rank::rank_all_ctx(
197            &query,
198            cvec.as_deref(),
199            &file_ids,
200            &project_ids,
201            &c.prompt,
202            &idx,
203            &cfg,
204        );
205        // The reranker reads text: enrich its query with the recent window when the
206        // prompt is vague (same gate that lets the context vector contribute).
207        let prompt_top = hits.iter().map(|h| h.cosine).fold(0.0_f32, f32::max);
208        let rerank_query = context::rerank_query(
209            &c.prompt,
210            prompt_top,
211            &c.context,
212            !file_ids.is_empty(),
213            &cfg,
214        );
215        let plan = pipeline::decide(&hits, &idx, &c.prompt, &rerank_query, &cfg);
216        let stage = match plan.stage {
217            Stage::Lexical => "lexical",
218            Stage::Rerank => "rerank",
219            Stage::Cosine => "stage1",
220        };
221        // Caller-side guardrails: the hook's `finalize` minus session dedup (the eval
222        // has no session) — drop denied skills, cap at `max_skills`.
223        let injected: Vec<Hit> = plan
224            .passed
225            .into_iter()
226            .filter(|h| !cfg.deny.contains(&h.id))
227            .take(cfg.max_skills)
228            .collect();
229        let ids: Vec<String> = injected.iter().map(|h| h.id.clone()).collect();
230        let is_neg = c.want == "(none)";
231        let observe_only = c.kind == "borderline";
232
233        if verbose {
234            let top: Vec<String> = hits
235                .iter()
236                .take(4)
237                .map(|h| format!("{}={:.3}", h.id, h.score))
238                .collect();
239            let inj: Vec<String> = injected
240                .iter()
241                .map(|h| {
242                    format!(
243                        "{}=L{:.2}/cos{:.3}+ctx{:.2}+file{:.2}+proj{:.2}+kw{:.2}+ph{:.2}",
244                        h.id, h.score, h.cosine, h.context, h.file, h.project, h.keyword, h.phrase
245                    )
246                })
247                .collect();
248            eprintln!(
249                "[{:<10}] {:<7} inject=[{}]  top: {}  :: {}",
250                c.kind,
251                stage,
252                inj.join(", "),
253                top.join(", "),
254                c.prompt,
255            );
256        }
257
258        if observe_only {
259            continue;
260        }
261        if is_neg {
262            n_neg += 1;
263            if injected.is_empty() {
264                tn += 1;
265            } else {
266                fp += 1;
267                fp_rows.push(format!(
268                    "  FP [{:<10}] inject=[{}] :: {}",
269                    c.kind,
270                    ids.join(", "),
271                    c.prompt
272                ));
273            }
274        } else {
275            n_pos += 1;
276            // Stage-1 ceiling: where does the gold skill land in the full hybrid
277            // ranking, before any rerank/threshold gating?
278            let rank = hits.iter().position(|h| h.id == c.want);
279            if rank == Some(0) {
280                stage1_top1 += 1;
281            }
282            if rank.is_some_and(|r| r < cfg.rerank_top_k) {
283                recall_at_k += 1;
284            } else {
285                recall_miss_rows.push(format!(
286                    "  R@k MISS [{:<10}] want={} stage-1 rank={} :: {}",
287                    c.kind,
288                    c.want,
289                    rank.map_or_else(|| "absent".to_string(), |r| r.to_string()),
290                    c.prompt
291                ));
292            }
293            if ids.iter().any(|id| id == &c.want) {
294                tp += 1;
295            } else {
296                fn_ += 1;
297                fn_rows.push(format!(
298                    "  FN [{:<10}] want={} got=[{}] :: {}",
299                    c.kind,
300                    c.want,
301                    ids.join(", "),
302                    c.prompt
303                ));
304            }
305        }
306    }
307
308    println!("\n=== eval: {} ===", path);
309    println!(
310        "positives {n_pos}: recall {tp}/{n_pos} ({:.0}%)   misses {fn_}",
311        pct(tp, n_pos)
312    );
313    println!(
314        "negatives {n_neg}: false-inject {fp}/{n_neg} ({:.0}%)   clean {tn}",
315        pct(fp, n_neg)
316    );
317    // Headline: recall recovered, net of discounted FP harm. Optimise this — not
318    // FP count — because a strong host filters false injects (see module docs).
319    let recall_rate = if n_pos == 0 {
320        0.0
321    } else {
322        tp as f32 / n_pos as f32
323    };
324    let fp_rate = if n_neg == 0 {
325        0.0
326    } else {
327        fp as f32 / n_neg as f32
328    };
329    println!(
330        "host-value {:.0}%  (= recall {:.0}% - {FP_HARM} * fp {:.0}%; FP discounted: a strong host ignores false injects)",
331        100.0 * (recall_rate - FP_HARM * fp_rate),
332        100.0 * recall_rate,
333        100.0 * fp_rate,
334    );
335    println!(
336        "stage-1 (pre-rerank, k={}): recall@k {recall_at_k}/{n_pos} ({:.0}%)   top-1 {stage1_top1}/{n_pos} ({:.0}%)",
337        cfg.rerank_top_k,
338        pct(recall_at_k, n_pos),
339        pct(stage1_top1, n_pos),
340    );
341    if !recall_miss_rows.is_empty() {
342        println!(
343            "--- stage-1 recall@k misses (gold below top-{}) ---",
344            cfg.rerank_top_k
345        );
346        recall_miss_rows.iter().for_each(|r| println!("{r}"));
347    }
348    if !fn_rows.is_empty() {
349        println!("--- recall misses ---");
350        fn_rows.iter().for_each(|r| println!("{r}"));
351    }
352    if !fp_rows.is_empty() {
353        println!("--- false injections ---");
354        fp_rows.iter().for_each(|r| println!("{r}"));
355    }
356    Ok(())
357}