Skip to main content

ski/
session.rs

1//! Per-conversation state: which skills are already in context, and at what
2//! confidence we last recommended them, so dedup can be *score-aware* rather than
3//! "seen once, suppressed forever".
4//!
5//! A skill is "loaded" either because **we** recommended it ([`Source::Ski`],
6//! with the confidence we showed) or because the **model** pulled it itself
7//! ([`Source::Model`], recorded by `ski observe`). The two are treated
8//! differently by [`Session::should_recommend`]:
9//! - **used** (`Model`) — never recommend again.
10//! - **recommended, unused** (`Ski`) — re-recommend only once it newly reaches
11//!   HIGH confidence (we get one stronger nudge; after a HIGH showing, never).
12//!
13//! All reads fail open: a missing or corrupt state file yields an empty session
14//! rather than an error, so the hot path can never be blocked by bad state.
15
16use serde::de::Deserializer;
17use serde::{Deserialize, Serialize};
18use std::collections::BTreeMap;
19use std::fs;
20use std::path::Path;
21use std::time::{SystemTime, UNIX_EPOCH};
22
23#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
24#[serde(rename_all = "lowercase")]
25pub enum Source {
26    /// We recommended this skill.
27    Ski,
28    /// The model loaded this skill on its own.
29    Model,
30}
31
32/// What we know about a skill already in context: who put it there, and (for a
33/// `Ski` recommendation) the confidence we displayed. `Model` loads carry the
34/// last confidence we'd shown, or `0.0` if we never recommended it.
35#[derive(Clone, Copy, Debug, PartialEq, Serialize)]
36pub struct Record {
37    pub source: Source,
38    pub confidence: f32,
39}
40
41// Backward-compatible read: an older state file stored each value as a bare
42// `"ski"`/`"model"` string. Accept either that (confidence 0) or the current
43// `{source, confidence}` object, so an in-flight session survives an upgrade.
44impl<'de> Deserialize<'de> for Record {
45    fn deserialize<D: Deserializer<'de>>(d: D) -> Result<Self, D::Error> {
46        #[derive(Deserialize)]
47        #[serde(untagged)]
48        enum Repr {
49            Bare(Source),
50            Full {
51                source: Source,
52                #[serde(default)]
53                confidence: f32,
54            },
55        }
56        Ok(match Repr::deserialize(d)? {
57            Repr::Bare(source) => Record {
58                source,
59                confidence: 0.0,
60            },
61            Repr::Full { source, confidence } => Record { source, confidence },
62        })
63    }
64}
65
66#[derive(Clone, Debug, Default, Serialize, Deserialize)]
67pub struct Session {
68    /// skill id -> how it got into context (and at what confidence).
69    #[serde(default)]
70    pub loaded: BTreeMap<String, Record>,
71    /// The most recent user prompt in this conversation. Stashed by the hook
72    /// **only when telemetry is on**, so a later self-load seen by `ski observe`
73    /// (a recall miss — the model loaded a skill we never recommended) can be
74    /// tied back to the prompt that was active. Empty otherwise; never serialized
75    /// when empty, so the non-telemetry hot path leaves the file unchanged.
76    #[serde(default, skip_serializing_if = "String::is_empty")]
77    pub last_prompt: String,
78    /// Recent user prompts in this conversation, oldest-first, bounded. Drives
79    /// query-side context enrichment: a vague follow-up ("now do the other one")
80    /// is disambiguated by the turns that preceded it. Maintained only when the
81    /// context feature is enabled (`Config::context_depth > 0`), so the default
82    /// hot path neither writes nor carries it. `#[serde(default)]` + skip-when-empty
83    /// keeps it invisible to indexes/sessions written before it existed.
84    #[serde(default, skip_serializing_if = "Vec::is_empty")]
85    pub recent_prompts: Vec<String>,
86    /// Unix seconds of the last write (diagnostics only).
87    #[serde(default)]
88    pub updated: u64,
89}
90
91impl Session {
92    /// Load state for a session, or an empty session if the file is missing or
93    /// unreadable. Never errors.
94    pub fn load(path: &Path) -> Session {
95        fs::read_to_string(path)
96            .ok()
97            .and_then(|s| serde_json::from_str(&s).ok())
98            .unwrap_or_default()
99    }
100
101    /// Persist state, stamping `updated`. Best-effort; callers in the hot path
102    /// should ignore the result so state IO can't block a prompt.
103    ///
104    /// Writes a per-process temp file then atomically renames it over the target,
105    /// so a concurrent reader (another hook/observe process sharing the
106    /// `session_id`) never observes a half-written file — a torn read used to
107    /// silently reset the session and re-arm dedup. The lost-update window — two
108    /// writers racing the load→mutate→save and one dropping the other's mark —
109    /// remains; it costs at most a missed dedup (a re-injection), never
110    /// corruption, and closing it would need an advisory lock.
111    pub fn save(&self, path: &Path) -> anyhow::Result<()> {
112        if let Some(parent) = path.parent() {
113            fs::create_dir_all(parent)?;
114        }
115        let mut snapshot = self.clone();
116        snapshot.updated = now_secs();
117        let json = serde_json::to_string_pretty(&snapshot)?;
118        let tmp = path.with_extension(format!("tmp.{}.{}", std::process::id(), now_nanos()));
119        fs::write(&tmp, json)?;
120        if let Err(e) = fs::rename(&tmp, path) {
121            let _ = fs::remove_file(&tmp);
122            return Err(e.into());
123        }
124        Ok(())
125    }
126
127    pub fn is_loaded(&self, id: &str) -> bool {
128        self.loaded.contains_key(id)
129    }
130
131    pub fn get(&self, id: &str) -> Option<&Record> {
132        self.loaded.get(id)
133    }
134
135    /// Persist like [`save`](Self::save), but first merge the `loaded` ledger
136    /// with whatever is on disk *now*, so a mark written by a concurrent process
137    /// survives. The hook loads its session snapshot, then spends the better part
138    /// of a second embedding/reranking before saving — ample time for `ski
139    /// observe` to record a model self-load that a plain save would overwrite
140    /// (the lost mark re-arms dedup and the skill gets re-injected).
141    ///
142    /// Merge rules, per skill id (dedup-safety errs toward suppression):
143    /// - present only on disk → kept (that's the concurrent writer's mark);
144    /// - `Model` beats `Ski` regardless of side (a used skill stays used);
145    /// - both `Ski` → the higher recorded confidence wins (matches
146    ///   [`should_recommend`](Self::should_recommend)'s "no repeat after a HIGH
147    ///   showing").
148    ///
149    /// Prompt fields (`last_prompt`, `recent_prompts`) are taken from `self`:
150    /// the hook is their only writer, and there is at most one hook per prompt.
151    /// Callers that intentionally *wipe* state (the compaction re-arm) must use
152    /// the plain [`save`](Self::save), or the merge would resurrect the ledger.
153    ///
154    /// The load→rename window still exists but shrinks from the whole hook
155    /// runtime to microseconds; closing it fully would need an advisory lock.
156    pub fn save_merged(&self, path: &Path) -> anyhow::Result<()> {
157        let disk = Session::load(path);
158        let mut merged = self.clone();
159        for (id, theirs) in disk.loaded {
160            match merged.loaded.get(&id) {
161                None => {
162                    merged.loaded.insert(id, theirs);
163                }
164                Some(ours) => {
165                    let take_theirs = match (theirs.source, ours.source) {
166                        (Source::Model, Source::Ski) => true,
167                        (Source::Ski, Source::Model) => false,
168                        _ => theirs.confidence > ours.confidence,
169                    };
170                    if take_theirs {
171                        merged.loaded.insert(id, theirs);
172                    }
173                }
174            }
175        }
176        merged.save(path)
177    }
178
179    /// Whether `id` should be recommended now, at `new_conf`, given what we
180    /// already know. The two dedup rules:
181    /// - a **used** skill (`Source::Model`) is never recommended again;
182    /// - a **recommended-but-unused** skill (`Source::Ski`) is re-recommended
183    ///   only when it newly reaches `high` confidence (it was shown below `high`
184    ///   before — a clearer prompt earns one stronger nudge; after a HIGH
185    ///   showing, never).
186    pub fn should_recommend(&self, id: &str, new_conf: f32, high: f32) -> bool {
187        match self.loaded.get(id) {
188            None => true,
189            Some(r) if r.source == Source::Model => false,
190            Some(r) => new_conf >= high && r.confidence < high,
191        }
192    }
193
194    /// Record that we recommended `id` at `confidence`. Stores the confidence we
195    /// just showed (so the next-turn `should_recommend` test is accurate), but
196    /// never downgrades a `Model` load — once the model used a skill it stays
197    /// used.
198    pub fn mark_recommended(&mut self, id: &str, confidence: f32) {
199        match self.loaded.get(id) {
200            Some(r) if r.source == Source::Model => {}
201            _ => {
202                self.loaded.insert(
203                    id.to_string(),
204                    Record {
205                        source: Source::Ski,
206                        confidence,
207                    },
208                );
209            }
210        }
211    }
212
213    /// Record that the model loaded `id` itself. Always wins (the strongest
214    /// signal); keeps any confidence we'd previously shown for diagnostics.
215    pub fn mark_used(&mut self, id: &str) {
216        let confidence = self.loaded.get(id).map(|r| r.confidence).unwrap_or(0.0);
217        self.loaded.insert(
218            id.to_string(),
219            Record {
220                source: Source::Model,
221                confidence,
222            },
223        );
224    }
225
226    /// Generic mark, kept for callers/tests that don't carry a confidence:
227    /// `Model` via [`mark_used`], `Ski` as a confidence-0 first sighting that
228    /// never overwrites an existing entry.
229    pub fn mark(&mut self, id: &str, source: Source) {
230        match source {
231            Source::Model => self.mark_used(id),
232            Source::Ski => {
233                self.loaded.entry(id.to_string()).or_insert(Record {
234                    source: Source::Ski,
235                    confidence: 0.0,
236                });
237            }
238        }
239    }
240
241    /// Append `prompt` to the rolling context window, keeping at most `max` of the
242    /// most recent prompts (oldest dropped first). A blank prompt, or one identical
243    /// to the immediately previous entry (a resubmit), is ignored so the window
244    /// holds distinct conversational turns. `max == 0` disables the window entirely
245    /// (the feature-off path).
246    pub fn push_prompt(&mut self, prompt: &str, max: usize) {
247        let p = prompt.trim();
248        if max == 0 || p.is_empty() {
249            return;
250        }
251        if self.recent_prompts.last().map(String::as_str) == Some(p) {
252            return;
253        }
254        self.recent_prompts.push(p.to_string());
255        let len = self.recent_prompts.len();
256        if len > max {
257            self.recent_prompts.drain(0..len - max);
258        }
259    }
260
261    /// Forget everything — used to re-arm on compaction so skills can be
262    /// re-injected into the fresh summary.
263    pub fn clear(&mut self) {
264        self.loaded.clear();
265        self.recent_prompts.clear();
266    }
267}
268
269fn now_secs() -> u64 {
270    SystemTime::now()
271        .duration_since(UNIX_EPOCH)
272        .map(|d| d.as_secs())
273        .unwrap_or(0)
274}
275
276/// Nanosecond stamp, used only to make the atomic-write temp path unique per
277/// writer so two concurrent saves can't collide on the same temp file.
278fn now_nanos() -> u128 {
279    SystemTime::now()
280        .duration_since(UNIX_EPOCH)
281        .map(|d| d.as_nanos())
282        .unwrap_or(0)
283}
284
285#[cfg(test)]
286mod tests {
287    use super::*;
288
289    #[test]
290    fn mark_and_dedup() {
291        let mut s = Session::default();
292        assert!(!s.is_loaded("a"));
293        s.mark("a", Source::Ski);
294        assert!(s.is_loaded("a"));
295    }
296
297    #[test]
298    fn model_load_is_not_downgraded() {
299        let mut s = Session::default();
300        s.mark("a", Source::Model);
301        s.mark("a", Source::Ski); // later self-inject must not overwrite
302        assert_eq!(s.loaded["a"].source, Source::Model);
303    }
304
305    #[test]
306    fn ski_then_model_upgrades() {
307        let mut s = Session::default();
308        s.mark("a", Source::Ski);
309        s.mark("a", Source::Model);
310        assert_eq!(s.loaded["a"].source, Source::Model);
311    }
312
313    #[test]
314    fn used_skill_is_never_recommended() {
315        let mut s = Session::default();
316        s.mark_used("a");
317        // Even a maxed-out confidence can't resurrect a used skill.
318        assert!(!s.should_recommend("a", 1.0, 0.80));
319    }
320
321    #[test]
322    fn unseen_skill_is_recommended() {
323        let s = Session::default();
324        assert!(s.should_recommend("a", 0.40, 0.80)); // any confidence, never seen
325    }
326
327    #[test]
328    fn repeat_only_on_rise_into_high() {
329        let mut s = Session::default();
330        s.mark_recommended("a", 0.60); // shown at medium
331        assert!(!s.should_recommend("a", 0.70, 0.80)); // still below high -> no repeat
332        assert!(s.should_recommend("a", 0.90, 0.80)); // newly high -> one nudge
333    }
334
335    #[test]
336    fn no_repeat_after_high_showing() {
337        let mut s = Session::default();
338        s.mark_recommended("a", 0.90); // already shown at high
339        assert!(!s.should_recommend("a", 0.95, 0.80)); // even higher -> still suppressed
340    }
341
342    #[test]
343    fn mark_recommended_does_not_downgrade_model() {
344        let mut s = Session::default();
345        s.mark_used("a");
346        s.mark_recommended("a", 0.99);
347        assert_eq!(s.loaded["a"].source, Source::Model);
348    }
349
350    #[test]
351    fn legacy_bare_string_value_still_loads() {
352        // Pre-confidence on-disk format: value is a bare source string.
353        let json = r#"{"loaded":{"a":"ski","b":"model"},"updated":0}"#;
354        let s: Session = serde_json::from_str(json).unwrap();
355        assert_eq!(s.loaded["a"].source, Source::Ski);
356        assert_eq!(s.loaded["a"].confidence, 0.0);
357        assert_eq!(s.loaded["b"].source, Source::Model);
358    }
359
360    #[test]
361    fn clear_re_arms() {
362        let mut s = Session::default();
363        s.mark("a", Source::Ski);
364        s.push_prompt("set up pytest", 3);
365        s.clear();
366        assert!(!s.is_loaded("a"));
367        assert!(s.recent_prompts.is_empty()); // window re-armed too
368    }
369
370    #[test]
371    fn push_prompt_bounds_window_oldest_first() {
372        let mut s = Session::default();
373        for p in ["one", "two", "three", "four"] {
374            s.push_prompt(p, 3);
375        }
376        // Capped at 3, oldest ("one") dropped, order preserved.
377        assert_eq!(s.recent_prompts, ["two", "three", "four"]);
378    }
379
380    #[test]
381    fn push_prompt_ignores_blank_and_consecutive_dupes() {
382        let mut s = Session::default();
383        s.push_prompt("  ", 3); // blank -> ignored
384        s.push_prompt("set up pytest", 3);
385        s.push_prompt("set up pytest", 3); // immediate resubmit -> ignored
386        s.push_prompt("now the other one", 3);
387        assert_eq!(s.recent_prompts, ["set up pytest", "now the other one"]);
388    }
389
390    #[test]
391    fn push_prompt_zero_max_disables_window() {
392        let mut s = Session::default();
393        s.push_prompt("anything", 0);
394        assert!(s.recent_prompts.is_empty()); // feature-off: never records
395    }
396
397    #[test]
398    fn recent_prompts_absent_when_empty_in_json() {
399        // skip_serializing_if keeps the field out of the on-disk form for the
400        // default (feature-off) path, so existing readers/writers are unaffected.
401        let s = Session::default();
402        let json = serde_json::to_string(&s).unwrap();
403        assert!(!json.contains("recent_prompts"), "got {json}");
404    }
405
406    #[test]
407    fn source_serializes_lowercase() {
408        let json = serde_json::to_string(&Source::Ski).unwrap();
409        assert_eq!(json, "\"ski\"");
410        let json = serde_json::to_string(&Source::Model).unwrap();
411        assert_eq!(json, "\"model\"");
412    }
413
414    #[test]
415    fn missing_file_is_empty_session() {
416        let s = Session::load(Path::new("/nonexistent/ski/session.json"));
417        assert!(s.loaded.is_empty());
418    }
419
420    #[test]
421    fn save_then_load_roundtrips_and_leaves_no_temp() {
422        let dir = std::env::temp_dir().join(format!(
423            "ski-session-save-{}-{}",
424            std::process::id(),
425            now_nanos()
426        ));
427        let path = dir.join("conv.json");
428        let mut s = Session::default();
429        s.mark("uv-setup", Source::Ski);
430        s.save(&path).unwrap();
431
432        let back = Session::load(&path);
433        assert_eq!(back.loaded["uv-setup"].source, Source::Ski);
434        // The temp file used by the atomic rename must not survive the write.
435        let leftovers: Vec<_> = fs::read_dir(&dir)
436            .unwrap()
437            .filter_map(|e| e.ok())
438            .map(|e| e.file_name())
439            .filter(|n| n != "conv.json")
440            .collect();
441        assert!(leftovers.is_empty(), "temp file left behind: {leftovers:?}");
442        let _ = fs::remove_dir_all(&dir);
443    }
444
445    #[test]
446    fn save_merged_keeps_concurrent_writers_mark() {
447        // The exact lost-update race (C2b): the hook loads its snapshot, then a
448        // concurrent `observe` records a model self-load, then the hook saves.
449        // A plain save drops the observe mark (re-arming dedup for a skill the
450        // model already used); save_merged must keep it.
451        let dir = std::env::temp_dir().join(format!(
452            "ski-session-merge-{}-{}",
453            std::process::id(),
454            now_nanos()
455        ));
456        let path = dir.join("conv.json");
457
458        let hook_snapshot = Session::load(&path); // hook loads (empty) state
459
460        let mut observe = Session::load(&path); // concurrent observe...
461        observe.mark_used("xlsx");
462        observe.save(&path).unwrap(); // ...lands its mark first
463
464        let mut hook = hook_snapshot;
465        hook.mark_recommended("pdf", 0.9);
466        hook.save_merged(&path).unwrap(); // hook saves its stale snapshot
467
468        let merged = Session::load(&path);
469        assert_eq!(merged.loaded["xlsx"].source, Source::Model, "mark lost");
470        assert_eq!(merged.loaded["pdf"].source, Source::Ski);
471        let _ = fs::remove_dir_all(&dir);
472    }
473
474    #[test]
475    fn save_merged_model_beats_ski_and_max_confidence_wins() {
476        let dir = std::env::temp_dir().join(format!(
477            "ski-session-merge2-{}-{}",
478            std::process::id(),
479            now_nanos()
480        ));
481        let path = dir.join("conv.json");
482
483        // Disk: a used; b recommended at HIGH.
484        let mut disk = Session::default();
485        disk.mark_used("a");
486        disk.mark_recommended("b", 0.9);
487        disk.save(&path).unwrap();
488
489        // Ours: a merely recommended (must stay Model); b re-shown lower (the
490        // HIGH record must survive so should_recommend keeps suppressing).
491        let mut ours = Session::default();
492        ours.mark_recommended("a", 0.99);
493        ours.mark_recommended("b", 0.6);
494        ours.save_merged(&path).unwrap();
495
496        let merged = Session::load(&path);
497        assert_eq!(merged.loaded["a"].source, Source::Model);
498        assert!(merged.loaded["b"].confidence > 0.8);
499        let _ = fs::remove_dir_all(&dir);
500    }
501
502    #[test]
503    fn plain_save_still_wipes_for_compaction() {
504        // The compact re-arm intentionally clears; it must NOT merge old marks
505        // back (that's why session_start uses save, not save_merged).
506        let dir = std::env::temp_dir().join(format!(
507            "ski-session-wipe-{}-{}",
508            std::process::id(),
509            now_nanos()
510        ));
511        let path = dir.join("conv.json");
512        let mut s = Session::default();
513        s.mark_used("a");
514        s.save(&path).unwrap();
515
516        let mut rearmed = Session::load(&path);
517        rearmed.clear();
518        rearmed.save(&path).unwrap();
519        assert!(Session::load(&path).loaded.is_empty());
520        let _ = fs::remove_dir_all(&dir);
521    }
522
523    #[test]
524    fn roundtrip_through_json() {
525        let mut s = Session::default();
526        s.mark("git-attribution", Source::Ski);
527        s.mark("uv-setup", Source::Model);
528        let text = serde_json::to_string(&s).unwrap();
529        let back: Session = serde_json::from_str(&text).unwrap();
530        assert_eq!(back.loaded["git-attribution"].source, Source::Ski);
531        assert_eq!(back.loaded["uv-setup"].source, Source::Model);
532    }
533}