Skip to main content

ski/
suggest.rs

1//! `ski suggest` — turn the telemetry log into concrete, copy-pasteable tuning
2//! actions. `ski history` *shows* the recall misses and false positives; this
3//! closes the loop by saying what to do about them:
4//!
5//! - **Recall misses** (the model self-loaded a skill ski stayed silent on,
6//!   repeatedly): suggest `force = ["<skill>"]` when one of the skill's existing
7//!   keywords already appears in the missed prompts (so `force` would have fired
8//!   as-is), and/or suggest new `keywords:` for its `SKILL.md` mined from the
9//!   recurring content tokens of the missed prompts.
10//! - **Repeat false positives** (ski injected it across several sessions and the
11//!   model never once used it): suggest `deny = ["<skill>"]` — the config key the
12//!   README calls the most-reached-for one.
13//!
14//! Everything here is *suggestion*, never mutation: ski does not edit the user's
15//! config or SKILL.md files. Analysis is pure ([`analyze`]) and unit-testable
16//! without IO; only [`run`] touches the filesystem. Read-only over the same JSONL
17//! `ski history` reads, and equally tolerant of malformed lines.
18
19use crate::history::{self, Verdict};
20use crate::index::Index;
21use crate::text::{match_tokens, norm_token};
22use std::collections::{BTreeMap, BTreeSet};
23
24/// Self-loads of a skill before a suggestion is emitted for it. One miss can be
25/// a fluke; two of the same skill is a pattern worth acting on. Single-occurrence
26/// misses are still listed (compactly) so a user watching a fresh log sees them
27/// accumulate.
28const MIN_MISS_EVIDENCE: usize = 2;
29
30/// Sessions in which a skill was injected-and-never-used (with zero uses in *any*
31/// session) before a `deny` is suggested. Deny is a blunt instrument — it silences
32/// the skill entirely — so the bar is higher than for the recall-side suggestions,
33/// in keeping with the project's err-toward-surfacing ethos.
34const MIN_DENY_SESSIONS: u64 = 3;
35
36/// How many mined keyword candidates to propose per skill.
37const MAX_KEYWORDS: usize = 3;
38
39/// How many sample prompts to keep per missed skill (for display).
40const MAX_PROMPTS: usize = 3;
41
42/// A skill the model keeps finding on its own while ski stays silent, with the
43/// action(s) that would close the gap.
44#[derive(Debug, PartialEq)]
45pub struct Miss {
46    pub skill: String,
47    /// Total self-loads ski abstained or never surfaced on.
48    pub occurrences: usize,
49    /// Of those, how many ski had ranked near the top (near-miss) vs ranked deep
50    /// (buried) vs never retrieved at all (absent) — tells the user whether the
51    /// gap is the gate or the retrieval.
52    pub near_miss: usize,
53    pub buried: usize,
54    pub absent: usize,
55    /// Sample prompts the misses happened on (up to [`MAX_PROMPTS`]).
56    pub prompts: Vec<String>,
57    /// Whether one of the skill's *existing* keywords already appears in a missed
58    /// prompt — i.e. `force = ["<skill>"]` would have fired with no other change.
59    pub force_ready: bool,
60    /// New keyword candidates mined from the missed prompts: content tokens
61    /// recurring across them that the skill's keywords/description don't already
62    /// carry. Empty when the misses share no vocabulary.
63    pub keywords: Vec<String>,
64}
65
66/// A skill ski keeps injecting that the model never uses.
67#[derive(Debug, PartialEq)]
68pub struct Deny {
69    pub skill: String,
70    /// Sessions where it was injected and never used.
71    pub fp_sessions: u64,
72}
73
74#[derive(Debug, Default, PartialEq)]
75pub struct Suggestions {
76    /// Recall-side actions, most-frequent miss first.
77    pub misses: Vec<Miss>,
78    /// Precision-side actions, most-frequent false positive first.
79    pub denies: Vec<Deny>,
80    /// Skills missed exactly once — no suggestion yet, listed so a pattern can be
81    /// watched as the log grows.
82    pub watch: Vec<String>,
83}
84
85/// Analyze a telemetry log against the (optional) index. The index supplies each
86/// skill's existing keywords/description so suggestions don't propose what is
87/// already there and can tell whether `force` is ready to fire; without it (no
88/// index built yet) keyword mining still works, just unfiltered.
89pub fn analyze(log: &str, idx: Option<&Index>) -> Suggestions {
90    let mut out = Suggestions::default();
91
92    // ---- Recall side: native picks ski abstained on or never surfaced. ----
93    struct Acc {
94        near: usize,
95        buried: usize,
96        absent: usize,
97        prompts: Vec<String>,
98    }
99    let mut by_skill: BTreeMap<String, Acc> = BTreeMap::new();
100    for row in history::compare(log) {
101        let (near, buried, absent) = match row.verdict {
102            Verdict::NearMiss { .. } => (1, 0, 0),
103            Verdict::Buried { .. } => (0, 1, 0),
104            Verdict::Absent => (0, 0, 1),
105            // Agreed proves nothing to fix; NoRanking can't be placed.
106            Verdict::Agreed | Verdict::NoRanking => continue,
107        };
108        let acc = by_skill.entry(row.native).or_insert(Acc {
109            near: 0,
110            buried: 0,
111            absent: 0,
112            prompts: Vec::new(),
113        });
114        acc.near += near;
115        acc.buried += buried;
116        acc.absent += absent;
117        if !row.prompt.is_empty() && !acc.prompts.contains(&row.prompt) {
118            acc.prompts.push(row.prompt);
119        }
120    }
121    for (skill, acc) in by_skill {
122        let occurrences = acc.near + acc.buried + acc.absent;
123        if occurrences < MIN_MISS_EVIDENCE {
124            out.watch.push(skill);
125            continue;
126        }
127        let entry = idx.and_then(|i| i.get(&skill));
128        // Tokens the skill already carries (keywords include its name tokens).
129        let known: BTreeSet<String> = entry
130            .map(|e| {
131                let mut t: BTreeSet<String> = e
132                    .keywords
133                    .iter()
134                    .flat_map(|k| match_tokens(k))
135                    .map(|t| norm_token(&t))
136                    .collect();
137                t.extend(match_tokens(&e.description));
138                t
139            })
140            .unwrap_or_default();
141        // An existing keyword hitting any missed prompt means `force` fires as-is.
142        let force_ready = entry.is_some_and(|e| {
143            acc.prompts.iter().any(|p| {
144                let toks: BTreeSet<String> =
145                    match_tokens(p).iter().map(|t| norm_token(t)).collect();
146                e.keywords.iter().any(|k| toks.contains(&norm_token(k)))
147            })
148        });
149        let keywords = mine_keywords(&acc.prompts, &known);
150        let mut prompts = acc.prompts;
151        prompts.truncate(MAX_PROMPTS);
152        out.misses.push(Miss {
153            skill,
154            occurrences,
155            near_miss: acc.near,
156            buried: acc.buried,
157            absent: acc.absent,
158            prompts,
159            force_ready,
160            keywords,
161        });
162    }
163    out.misses.sort_by(|a, b| {
164        b.occurrences
165            .cmp(&a.occurrences)
166            .then(a.skill.cmp(&b.skill))
167    });
168
169    // ---- Precision side: injected across sessions, never once used. ----
170    let recd = history::recommended_by_session(log);
171    let used = history::used_by_session(log);
172    let mut fp_sessions: BTreeMap<String, u64> = BTreeMap::new();
173    let mut ever_used: BTreeSet<&String> = BTreeSet::new();
174    for (session, skills) in &recd {
175        for skill in skills {
176            if used.get(session).is_some_and(|u| u.contains(skill)) {
177                ever_used.insert(skill);
178            } else {
179                *fp_sessions.entry(skill.clone()).or_default() += 1;
180            }
181        }
182    }
183    for skills in used.values() {
184        ever_used.extend(skills.iter());
185    }
186    for (skill, n) in fp_sessions {
187        if n >= MIN_DENY_SESSIONS && !ever_used.contains(&skill) {
188            out.denies.push(Deny {
189                skill,
190                fp_sessions: n,
191            });
192        }
193    }
194    out.denies.sort_by(|a, b| {
195        b.fp_sessions
196            .cmp(&a.fp_sessions)
197            .then(a.skill.cmp(&b.skill))
198    });
199    out
200}
201
202/// Keyword candidates from a skill's missed prompts: content tokens recurring in
203/// at least two prompts (or all tokens when there is only one prompt would be too
204/// noisy, so a single prompt yields nothing), minus what the skill already
205/// carries. Most-recurrent first, capped at [`MAX_KEYWORDS`].
206fn mine_keywords(prompts: &[String], known: &BTreeSet<String>) -> Vec<String> {
207    if prompts.len() < 2 {
208        return Vec::new();
209    }
210    let mut counts: BTreeMap<String, usize> = BTreeMap::new();
211    for p in prompts {
212        let toks: BTreeSet<String> = match_tokens(p).into_iter().collect();
213        for t in toks {
214            *counts.entry(t).or_default() += 1;
215        }
216    }
217    let mut cands: Vec<(String, usize)> = counts
218        .into_iter()
219        .filter(|(t, n)| *n >= 2 && !known.contains(t))
220        .collect();
221    cands.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(&b.0)));
222    cands
223        .into_iter()
224        .take(MAX_KEYWORDS)
225        .map(|(t, _)| t)
226        .collect()
227}
228
229/// `ski suggest`: analyze the telemetry log for `host` and print actions.
230pub fn run(host: crate::hook::Host) -> anyhow::Result<()> {
231    let path = crate::paths::telemetry_path();
232    let Ok(log) = std::fs::read_to_string(&path) else {
233        println!(
234            "no telemetry log at {} (enable with SKI_TELEMETRY=1, or telemetry = true in config.toml)",
235            path.display()
236        );
237        return Ok(());
238    };
239    // Best-effort: suggestions degrade gracefully (unfiltered keyword mining, no
240    // force-readiness check) when no index has been built yet.
241    let idx = Index::load(&crate::paths::index_path(host)).ok().flatten();
242    print_suggestions(&analyze(&log, idx.as_ref()));
243    Ok(())
244}
245
246fn print_suggestions(s: &Suggestions) {
247    if s.misses.is_empty() && s.denies.is_empty() && s.watch.is_empty() {
248        println!("nothing to suggest: no repeated recall misses or unused injections in the log.");
249        return;
250    }
251    if !s.misses.is_empty() {
252        println!("recall misses — the model loaded these itself while ski stayed silent:\n");
253        for m in &s.misses {
254            println!(
255                "  {}  ×{} self-loads (ski: near-miss ×{}, buried ×{}, never surfaced ×{})",
256                m.skill, m.occurrences, m.near_miss, m.buried, m.absent
257            );
258            for p in &m.prompts {
259                println!("    prompt: {}", crate::history::truncate(p, 100));
260            }
261            if m.force_ready {
262                println!(
263                    "    -> config.toml:  force = [\"{}\"]   (an existing keyword already hits these prompts)",
264                    m.skill
265                );
266            }
267            if !m.keywords.is_empty() {
268                println!(
269                    "    -> its SKILL.md: add keywords: [{}]{}",
270                    m.keywords.join(", "),
271                    if m.force_ready {
272                        ""
273                    } else {
274                        "   (then force = [...] becomes effective too)"
275                    }
276                );
277            }
278            if !m.force_ready && m.keywords.is_empty() {
279                println!(
280                    "    -> the missed prompts share no vocabulary; consider expanding the skill's description"
281                );
282            }
283            println!();
284        }
285    }
286    if !s.denies.is_empty() {
287        println!("repeat false positives — injected across sessions, never once used:\n");
288        for d in &s.denies {
289            println!("  {}  unused in {} sessions", d.skill, d.fp_sessions);
290            println!("    -> config.toml:  deny = [\"{}\"]", d.skill);
291            println!();
292        }
293    }
294    if !s.watch.is_empty() {
295        println!(
296            "watching (one miss each, no suggestion yet): {}",
297            s.watch.join(", ")
298        );
299    }
300}
301
302#[cfg(test)]
303mod tests {
304    use super::*;
305    use crate::index::Entry;
306
307    fn idx() -> Index {
308        let entry = |id: &str, description: &str, keywords: &[&str]| Entry {
309            id: id.to_string(),
310            name: id.to_string(),
311            description: description.to_string(),
312            path: String::new(),
313            keywords: keywords.iter().map(|k| k.to_string()).collect(),
314            trigger_phrases: Vec::new(),
315            body_head: String::new(),
316            hash: String::new(),
317            embedding: Vec::new(),
318        };
319        Index {
320            model: "test".into(),
321            dim: 0,
322            skills: vec![
323                entry(
324                    "uv-development",
325                    "Manage Python projects with uv.",
326                    &["uv", "python"],
327                ),
328                entry("pickup", "Resume a handoff.", &[]),
329            ],
330        }
331    }
332
333    // uv-development: self-loaded twice on prompts ski abstained on (one near-miss,
334    // one absent). One of the prompts contains its existing keyword "uv" -> force
335    // is ready. Both prompts share "dependency"/"lockfile" -> mined keywords.
336    // pickup: injected in 3 sessions, never used -> deny candidate.
337    // handoff: missed once -> watch list only.
338    const LOG: &str = r#"
339{"ts":1000,"kind":"recommend","session":"s1","stage":"rerank","prompt":"bump the dependency in the uv lockfile","considered":[{"id":"uv-development","score":-1.9}],"candidates":[],"injected":[],"abstained":"below_gate"}
340{"ts":1100,"kind":"use","session":"s1","skill":"uv-development","via":"skill","prompt":"bump the dependency in the uv lockfile"}
341{"ts":2000,"kind":"recommend","session":"s2","stage":"rerank","prompt":"pin that dependency in the lockfile","considered":[{"id":"other","score":-2.0}],"candidates":[],"injected":[],"abstained":"below_gate"}
342{"ts":2100,"kind":"use","session":"s2","skill":"uv-development","via":"read","prompt":"pin that dependency in the lockfile"}
343{"ts":3000,"kind":"recommend","session":"s3","stage":"cosine","prompt":"x","considered":[],"candidates":[{"id":"pickup","confidence":0.6}],"injected":[{"id":"pickup","confidence":0.6}]}
344{"ts":4000,"kind":"recommend","session":"s4","stage":"cosine","prompt":"y","considered":[],"candidates":[{"id":"pickup","confidence":0.6}],"injected":[{"id":"pickup","confidence":0.6}]}
345{"ts":5000,"kind":"recommend","session":"s5","stage":"cosine","prompt":"z","considered":[],"candidates":[{"id":"pickup","confidence":0.6}],"injected":[{"id":"pickup","confidence":0.6}]}
346{"ts":6000,"kind":"recommend","session":"s6","stage":"rerank","prompt":"write the handoff notes","considered":[{"id":"handoff","score":-1.8}],"candidates":[],"injected":[],"abstained":"below_gate"}
347{"ts":6100,"kind":"use","session":"s6","skill":"handoff","via":"skill","prompt":"write the handoff notes"}
348"#;
349
350    #[test]
351    fn analyze_suggests_force_and_keywords_for_repeat_miss() {
352        let s = analyze(LOG, Some(&idx()));
353        assert_eq!(s.misses.len(), 1, "{s:?}");
354        let m = &s.misses[0];
355        assert_eq!(m.skill, "uv-development");
356        assert_eq!(m.occurrences, 2);
357        assert_eq!(m.near_miss, 1); // ranked #1 on the first prompt
358        assert_eq!(m.absent, 1); // not in considered on the second
359        assert!(m.force_ready); // "uv" keyword appears in the first prompt
360                                // "dependency" and "lockfile" recur across both prompts and are not
361                                // already carried by the skill; "uv"/"python" are known and excluded.
362        assert!(m.keywords.contains(&"dependency".to_string()), "{m:?}");
363        assert!(m.keywords.contains(&"lockfile".to_string()), "{m:?}");
364        assert!(!m.keywords.contains(&"uv".to_string()));
365    }
366
367    #[test]
368    fn analyze_suggests_deny_for_never_used_repeat_fp() {
369        let s = analyze(LOG, Some(&idx()));
370        assert_eq!(s.denies.len(), 1, "{s:?}");
371        assert_eq!(s.denies[0].skill, "pickup");
372        assert_eq!(s.denies[0].fp_sessions, 3);
373    }
374
375    #[test]
376    fn single_miss_goes_to_watch_not_suggestion() {
377        let s = analyze(LOG, Some(&idx()));
378        assert_eq!(s.watch, vec!["handoff".to_string()]);
379    }
380
381    #[test]
382    fn deny_requires_never_used_anywhere() {
383        // Same 3 unused sessions, but one other session *did* use pickup: no deny.
384        let log = format!(
385            "{LOG}\n{}",
386            r#"{"ts":7000,"kind":"use","session":"s7","skill":"pickup","via":"skill","prompt":"resume"}"#
387        );
388        let s = analyze(&log, Some(&idx()));
389        assert!(s.denies.is_empty(), "{s:?}");
390    }
391
392    #[test]
393    fn analyze_without_index_still_mines_keywords() {
394        // No index: force-readiness can't be checked (false) and mining is
395        // unfiltered ("uv" is no longer known, so it may appear as a candidate).
396        let s = analyze(LOG, None);
397        let m = &s.misses[0];
398        assert!(!m.force_ready);
399        assert!(m.keywords.contains(&"dependency".to_string()));
400    }
401
402    #[test]
403    fn empty_log_yields_nothing() {
404        assert_eq!(analyze("", Some(&idx())), Suggestions::default());
405    }
406
407    #[test]
408    fn mine_keywords_needs_recurrence() {
409        let known = BTreeSet::new();
410        // A single prompt yields nothing (no recurrence signal).
411        assert!(mine_keywords(&["one prompt only".to_string()], &known).is_empty());
412        // Tokens appearing in both prompts survive; one-off tokens don't.
413        let got = mine_keywords(
414            &[
415                "rotate the api credentials".to_string(),
416                "rotate stale credentials".to_string(),
417            ],
418            &known,
419        );
420        assert!(got.contains(&"rotate".to_string()) && got.contains(&"credential".to_string()));
421        assert!(!got.contains(&"stale".to_string()));
422    }
423}