1use crate::paths;
11use std::collections::{BTreeMap, BTreeSet};
12use std::fs;
13
14#[derive(Debug, Default, PartialEq)]
16pub struct Stats {
17 pub recommend_events: u64,
18 pub use_events: u64,
19 pub sessions: usize,
20 pub recommended: u64,
22 pub used_after_rec: u64,
24 pub false_positives: u64,
26 pub recall_misses: u64,
28 pub fp_by_skill: BTreeMap<String, u64>,
30 pub miss_by_skill: BTreeMap<String, u64>,
32}
33
34#[derive(Debug, PartialEq)]
36pub struct Cand {
37 pub id: String,
38 pub confidence: f32,
39}
40
41#[derive(Debug, PartialEq)]
44pub struct Ranked {
45 pub id: String,
46 pub score: f32,
47}
48
49#[derive(Debug, PartialEq)]
51pub struct RecEvent {
52 pub ts: u128,
53 pub session: String,
54 pub stage: String,
55 pub prompt: String,
56 pub considered: Vec<Ranked>,
60 pub candidates: Vec<Cand>,
62 pub injected: Vec<Cand>,
64 pub abstained: Option<String>,
67}
68
69#[derive(Debug, PartialEq)]
73pub struct UseEvent {
74 pub ts: u128,
75 pub session: String,
76 pub skill: String,
77 pub via: String,
78 pub prompt: String,
79}
80
81pub fn recommend_events(log: &str) -> Vec<RecEvent> {
84 let mut out = Vec::new();
85 for line in log.lines() {
86 let line = line.trim();
87 if line.is_empty() {
88 continue;
89 }
90 let Ok(v) = serde_json::from_str::<serde_json::Value>(line) else {
91 continue;
92 };
93 if v.get("kind").and_then(|k| k.as_str()) != Some("recommend") {
94 continue;
95 }
96 out.push(RecEvent {
97 ts: v.get("ts").and_then(|t| t.as_u64()).unwrap_or(0) as u128,
98 session: str_field(&v, "session"),
99 stage: str_field(&v, "stage"),
100 prompt: str_field(&v, "prompt"),
101 considered: parse_ranked(v.get("considered")),
102 candidates: parse_cands(v.get("candidates")),
103 injected: parse_cands(v.get("injected")),
104 abstained: v
105 .get("abstained")
106 .and_then(|a| a.as_str())
107 .map(str::to_string),
108 });
109 }
110 out
111}
112
113pub fn used_by_session(log: &str) -> BTreeMap<String, BTreeSet<String>> {
116 let mut used: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
117 for line in log.lines() {
118 let Ok(v) = serde_json::from_str::<serde_json::Value>(line.trim()) else {
119 continue;
120 };
121 if v.get("kind").and_then(|k| k.as_str()) != Some("use") {
122 continue;
123 }
124 if let Some(skill) = v.get("skill").and_then(|s| s.as_str()) {
125 used.entry(str_field(&v, "session"))
126 .or_default()
127 .insert(skill.to_string());
128 }
129 }
130 used
131}
132
133pub fn use_events(log: &str) -> Vec<UseEvent> {
136 let mut out = Vec::new();
137 for line in log.lines() {
138 let Ok(v) = serde_json::from_str::<serde_json::Value>(line.trim()) else {
139 continue;
140 };
141 if v.get("kind").and_then(|k| k.as_str()) != Some("use") {
142 continue;
143 }
144 let Some(skill) = v.get("skill").and_then(|s| s.as_str()) else {
145 continue;
146 };
147 out.push(UseEvent {
148 ts: v.get("ts").and_then(|t| t.as_u64()).unwrap_or(0) as u128,
149 session: str_field(&v, "session"),
150 skill: skill.to_string(),
151 via: str_field(&v, "via"),
152 prompt: str_field(&v, "prompt"),
153 });
154 }
155 out
156}
157
158pub fn recommended_by_session(log: &str) -> BTreeMap<String, BTreeSet<String>> {
162 let mut rec: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
163 for e in recommend_events(log) {
164 let entry = rec.entry(e.session).or_default();
165 for c in e.injected {
166 entry.insert(c.id);
167 }
168 }
169 rec
170}
171
172fn str_field(v: &serde_json::Value, key: &str) -> String {
173 v.get(key)
174 .and_then(|s| s.as_str())
175 .unwrap_or("")
176 .to_string()
177}
178
179fn parse_cands(v: Option<&serde_json::Value>) -> Vec<Cand> {
180 v.and_then(|a| a.as_array())
181 .map(|arr| {
182 arr.iter()
183 .filter_map(|item| {
184 let id = item.get("id")?.as_str()?.to_string();
185 let confidence = item
186 .get("confidence")
187 .and_then(|c| c.as_f64())
188 .unwrap_or(0.0) as f32;
189 Some(Cand { id, confidence })
190 })
191 .collect()
192 })
193 .unwrap_or_default()
194}
195
196fn parse_ranked(v: Option<&serde_json::Value>) -> Vec<Ranked> {
197 v.and_then(|a| a.as_array())
198 .map(|arr| {
199 arr.iter()
200 .filter_map(|item| {
201 let id = item.get("id")?.as_str()?.to_string();
202 let score = item.get("score").and_then(|s| s.as_f64()).unwrap_or(0.0) as f32;
203 Some(Ranked { id, score })
204 })
205 .collect()
206 })
207 .unwrap_or_default()
208}
209
210pub fn aggregate(log: &str) -> Stats {
212 let mut rec: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
214 let mut used: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
215 let mut stats = Stats::default();
216
217 for line in log.lines() {
218 let line = line.trim();
219 if line.is_empty() {
220 continue;
221 }
222 let Ok(v) = serde_json::from_str::<serde_json::Value>(line) else {
223 continue;
224 };
225 let session = v.get("session").and_then(|s| s.as_str()).unwrap_or("");
226 match v.get("kind").and_then(|k| k.as_str()) {
227 Some("recommend") => {
228 stats.recommend_events += 1;
229 let entry = rec.entry(session.to_string()).or_default();
230 if let Some(arr) = v.get("injected").and_then(|i| i.as_array()) {
231 for item in arr {
232 if let Some(id) = item.get("id").and_then(|i| i.as_str()) {
233 entry.insert(id.to_string());
234 }
235 }
236 }
237 }
238 Some("use") => {
239 stats.use_events += 1;
240 if let Some(skill) = v.get("skill").and_then(|s| s.as_str()) {
241 used.entry(session.to_string())
242 .or_default()
243 .insert(skill.to_string());
244 }
245 }
246 _ => {}
247 }
248 }
249
250 let sessions: BTreeSet<&String> = rec.keys().chain(used.keys()).collect();
251 stats.sessions = sessions.len();
252
253 for session in sessions {
254 let recommended = rec.get(session).cloned().unwrap_or_default();
255 let consumed = used.get(session).cloned().unwrap_or_default();
256 for id in &recommended {
257 stats.recommended += 1;
258 if consumed.contains(id) {
259 stats.used_after_rec += 1;
260 } else {
261 stats.false_positives += 1;
262 *stats.fp_by_skill.entry(id.clone()).or_default() += 1;
263 }
264 }
265 for id in &consumed {
266 if !recommended.contains(id) {
267 stats.recall_misses += 1;
268 *stats.miss_by_skill.entry(id.clone()).or_default() += 1;
269 }
270 }
271 }
272 stats
273}
274
275const NEAR_MISS_RANK: usize = 3;
279
280#[derive(Debug, PartialEq)]
282pub enum Verdict {
283 Agreed,
286 NearMiss { rank: usize, score: f32 },
289 Buried { rank: usize, score: f32 },
291 Absent,
294 NoRanking,
297}
298
299#[derive(Debug, PartialEq)]
302pub struct CompareRow {
303 pub session: String,
304 pub prompt: String,
305 pub stage: String,
306 pub native: String,
308 pub via: String,
309 pub verdict: Verdict,
310}
311
312pub fn compare(log: &str) -> Vec<CompareRow> {
316 let recs = recommend_events(log);
317 use_events(log)
318 .into_iter()
319 .map(|u| {
320 let rec = match_recommend(&recs, &u);
321 let (stage, verdict) = match rec {
322 None => (String::new(), Verdict::NoRanking),
323 Some(r) => (r.stage.clone(), classify(r, &u.skill)),
324 };
325 CompareRow {
326 session: u.session,
327 prompt: u.prompt,
328 stage,
329 native: u.skill,
330 via: u.via,
331 verdict,
332 }
333 })
334 .collect()
335}
336
337fn match_recommend<'a>(recs: &'a [RecEvent], u: &UseEvent) -> Option<&'a RecEvent> {
341 if u.prompt.is_empty() {
342 return None;
343 }
344 let mut matches: Vec<&RecEvent> = recs
345 .iter()
346 .filter(|r| r.session == u.session && r.prompt == u.prompt)
347 .collect();
348 if matches.is_empty() {
349 return None;
350 }
351 matches.sort_by_key(|r| r.ts);
352 matches
353 .iter()
354 .rev()
355 .find(|r| r.ts <= u.ts)
356 .or_else(|| matches.first())
357 .copied()
358}
359
360fn classify(r: &RecEvent, native: &str) -> Verdict {
362 if r.injected.iter().any(|c| c.id == native) {
363 return Verdict::Agreed;
364 }
365 match r.considered.iter().position(|c| c.id == native) {
366 Some(i) => {
367 let rank = i + 1;
368 let score = r.considered[i].score;
369 if rank <= NEAR_MISS_RANK {
370 Verdict::NearMiss { rank, score }
371 } else {
372 Verdict::Buried { rank, score }
373 }
374 }
375 None => Verdict::Absent,
376 }
377}
378
379pub fn run(tail: Option<usize>, session: Option<&str>, compare_view: bool) -> anyhow::Result<()> {
384 let path = paths::telemetry_path();
385 let Ok(log) = fs::read_to_string(&path) else {
386 println!(
387 "no telemetry log at {} (enable with SKI_TELEMETRY=1)",
388 path.display()
389 );
390 return Ok(());
391 };
392 if compare_view {
393 print_compare(&log, session);
394 } else {
395 match tail {
396 Some(n) => print_events(&log, n, session),
397 None => print_aggregate(&log),
398 }
399 }
400 Ok(())
401}
402
403enum Ev<'a> {
406 Rec(&'a RecEvent),
407 Use(&'a UseEvent),
408}
409
410impl Ev<'_> {
411 fn ts(&self) -> u128 {
412 match self {
413 Ev::Rec(e) => e.ts,
414 Ev::Use(u) => u.ts,
415 }
416 }
417}
418
419fn print_events(log: &str, n: usize, session_filter: Option<&str>) {
422 let used = used_by_session(log);
423 let recd = recommended_by_session(log);
424 let recs = recommend_events(log);
425 let uses = use_events(log);
426 let keep = |s: &str| session_filter.is_none_or(|sf| s.contains(sf));
427
428 let mut timeline: Vec<Ev> = recs
429 .iter()
430 .filter(|e| keep(&e.session))
431 .map(Ev::Rec)
432 .chain(uses.iter().filter(|u| keep(&u.session)).map(Ev::Use))
433 .collect();
434 timeline.sort_by_key(Ev::ts);
436
437 if timeline.is_empty() {
438 println!("no events");
439 return;
440 }
441 let total = timeline.len();
442 let start = total.saturating_sub(n);
443 println!("showing {} of {total} events", total - start);
444 let now = now_ms();
445 let empty = BTreeSet::new();
446 for ev in &timeline[start..] {
447 match ev {
448 Ev::Rec(e) => {
449 let used_here = used.get(&e.session).unwrap_or(&empty);
450 let injected_ids: BTreeSet<&str> =
451 e.injected.iter().map(|c| c.id.as_str()).collect();
452 println!(
453 "\n{} session {} rec stage {}",
454 ago(e.ts, now),
455 short(&e.session),
456 if e.stage.is_empty() { "?" } else { &e.stage },
457 );
458 println!(" prompt: {}", truncate(&e.prompt, 120));
459 for c in &e.injected {
460 let mark = if used_here.contains(&c.id) {
461 "used"
462 } else {
463 "unused"
464 };
465 println!(" -> {:<26} {:.2} {mark}", c.id, c.confidence);
466 }
467 for c in &e.candidates {
469 if !injected_ids.contains(c.id.as_str()) {
470 println!(" {:<26} {:.2} (over budget)", c.id, c.confidence);
471 }
472 }
473 }
474 Ev::Use(u) => {
475 let acted = recd.get(&u.session).is_some_and(|s| s.contains(&u.skill));
476 let tag = if acted { "acted on rec" } else { "RECALL MISS" };
477 println!(
478 "\n{} session {} use {} via {} ({tag})",
479 ago(u.ts, now),
480 short(&u.session),
481 u.skill,
482 u.via,
483 );
484 if !u.prompt.is_empty() {
485 println!(" prompt: {}", truncate(&u.prompt, 120));
486 }
487 }
488 }
489 }
490}
491
492fn print_aggregate(log: &str) {
494 let s = aggregate(log);
495 println!(
496 "events: {} recommend, {} use across {} sessions",
497 s.recommend_events, s.use_events, s.sessions
498 );
499 println!(
500 "recommended: {} used-after-rec: {} ({}) false positives: {} ({})",
501 s.recommended,
502 s.used_after_rec,
503 pct(s.used_after_rec, s.recommended),
504 s.false_positives,
505 pct(s.false_positives, s.recommended),
506 );
507 println!(
508 "recall misses (used, never recommended): {}",
509 s.recall_misses
510 );
511 print_top("top false positives", &s.fp_by_skill);
512 print_top("top recall misses", &s.miss_by_skill);
513}
514
515fn print_compare(log: &str, session_filter: Option<&str>) {
519 let rows: Vec<CompareRow> = compare(log)
520 .into_iter()
521 .filter(|r| session_filter.is_none_or(|sf| r.session.contains(sf)))
522 .collect();
523 if rows.is_empty() {
524 println!("no native-chooser picks logged (need `use` events; enable SKI_TELEMETRY=1)");
525 return;
526 }
527 let sessions: BTreeSet<&str> = rows.iter().map(|r| r.session.as_str()).collect();
528 let (mut agreed, mut near, mut buried, mut absent, mut no_rank) = (0, 0, 0, 0, 0);
529 for r in &rows {
530 match r.verdict {
531 Verdict::Agreed => agreed += 1,
532 Verdict::NearMiss { .. } => near += 1,
533 Verdict::Buried { .. } => buried += 1,
534 Verdict::Absent => absent += 1,
535 Verdict::NoRanking => no_rank += 1,
536 }
537 }
538 println!(
539 "ski vs native chooser — {} picks across {} sessions",
540 rows.len(),
541 sessions.len()
542 );
543 println!(" agreed (ski injected it too): {agreed}");
544 println!(" NEAR-MISS (ski ranked it ≤{NEAR_MISS_RANK}, abstained): {near} <- tunable edge");
545 println!(" buried (ranked deeper, abstained): {buried}");
546 println!(" absent (ski never surfaced it): {absent} <- retrieval ceiling");
547 if no_rank > 0 {
548 println!(" no ranking logged for the prompt: {no_rank}");
549 }
550 println!(
551 "\nnote: native picks are observed *after* ski injects, so \"agreed\" doesn't prove ski\n\
552 caused it. The clean edge signal is NEAR-MISS/buried — the model found a skill itself\n\
553 that ski ranked but gated out."
554 );
555
556 let mut edges: Vec<(usize, f32, &CompareRow)> = rows
558 .iter()
559 .filter_map(|r| match r.verdict {
560 Verdict::NearMiss { rank, score } | Verdict::Buried { rank, score } => {
561 Some((rank, score, r))
562 }
563 _ => None,
564 })
565 .collect();
566 edges.sort_by(|a, b| a.0.cmp(&b.0).then(a.2.prompt.cmp(&b.2.prompt)));
567 if !edges.is_empty() {
568 println!("\nedge candidates (native picked it, ski abstained):");
569 for (rank, score, r) in &edges {
570 println!(
571 " {:<60} -> {} via {} ski #{rank} score {score:.3} [{}]",
572 truncate(&r.prompt, 60),
573 r.native,
574 r.via,
575 if r.stage.is_empty() { "?" } else { &r.stage },
576 );
577 }
578 }
579
580 let absent_rows: Vec<&CompareRow> = rows
581 .iter()
582 .filter(|r| r.verdict == Verdict::Absent)
583 .collect();
584 if !absent_rows.is_empty() {
585 println!("\nabsent (ski never surfaced — retrieval miss):");
586 for r in &absent_rows {
587 println!(
588 " {:<60} -> {} via {}",
589 truncate(&r.prompt, 60),
590 r.native,
591 r.via
592 );
593 }
594 }
595}
596
597pub fn clear(telemetry: bool) -> anyhow::Result<()> {
599 let sessions = paths::sessions_dir();
600 let removed = match fs::remove_dir_all(&sessions) {
601 Ok(()) => true,
602 Err(e) if e.kind() == std::io::ErrorKind::NotFound => false,
603 Err(e) => return Err(e.into()),
604 };
605 println!(
606 "{} session state at {}",
607 if removed { "cleared" } else { "no" },
608 sessions.display()
609 );
610 if telemetry {
611 let log = paths::telemetry_path();
612 match fs::remove_file(&log) {
613 Ok(()) => println!("cleared telemetry log at {}", log.display()),
614 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
615 Err(e) => return Err(e.into()),
616 }
617 }
618 Ok(())
619}
620
621fn now_ms() -> u128 {
622 std::time::SystemTime::now()
623 .duration_since(std::time::UNIX_EPOCH)
624 .map(|d| d.as_millis())
625 .unwrap_or(0)
626}
627
628fn ago(ts_ms: u128, now_ms: u128) -> String {
630 if ts_ms == 0 || ts_ms > now_ms {
631 return "?".to_string();
632 }
633 let secs = (now_ms - ts_ms) / 1000;
634 if secs < 60 {
635 format!("{secs}s ago")
636 } else if secs < 3600 {
637 format!("{}m ago", secs / 60)
638 } else if secs < 86_400 {
639 format!("{}h ago", secs / 3600)
640 } else {
641 format!("{}d ago", secs / 86_400)
642 }
643}
644
645fn short(session: &str) -> String {
647 session.chars().take(8).collect()
648}
649
650pub(crate) fn truncate(s: &str, max: usize) -> String {
652 let s = s.replace('\n', " ");
653 if s.chars().count() <= max {
654 s
655 } else {
656 let cut: String = s.chars().take(max).collect();
657 format!("{cut}…")
658 }
659}
660
661fn pct(n: u64, d: u64) -> String {
662 if d == 0 {
663 "0%".to_string()
664 } else {
665 format!("{:.0}%", 100.0 * n as f64 / d as f64)
666 }
667}
668
669fn print_top(label: &str, by_skill: &BTreeMap<String, u64>) {
671 if by_skill.is_empty() {
672 return;
673 }
674 let mut rows: Vec<(&String, &u64)> = by_skill.iter().collect();
675 rows.sort_by(|a, b| b.1.cmp(a.1).then(a.0.cmp(b.0)));
676 let shown: Vec<String> = rows
677 .iter()
678 .take(8)
679 .map(|(id, n)| format!("{id} ×{n}"))
680 .collect();
681 println!("{label}: {}", shown.join(", "));
682}
683
684#[cfg(test)]
685mod tests {
686 use super::*;
687
688 const LOG: &str = r#"
689{"kind":"recommend","session":"s1","injected":[{"id":"git-attribution","confidence":0.9},{"id":"pdf","confidence":0.6}]}
690{"kind":"use","session":"s1","skill":"git-attribution","via":"skill"}
691{"kind":"use","session":"s1","skill":"xlsx","via":"read"}
692{"kind":"recommend","session":"s2","injected":[{"id":"pdf","confidence":0.7}]}
693not json, should be skipped
694{"kind":"recommend","session":"s2","injected":[{"id":"pdf","confidence":0.8}]}
695"#;
696
697 #[test]
698 fn aggregate_counts_outcomes() {
699 let s = aggregate(LOG);
700 assert_eq!(s.recommend_events, 3);
701 assert_eq!(s.use_events, 2);
702 assert_eq!(s.sessions, 2);
703 assert_eq!(s.recommended, 3);
705 assert_eq!(s.used_after_rec, 1);
707 assert_eq!(s.false_positives, 2);
709 assert_eq!(s.fp_by_skill.get("pdf"), Some(&2));
710 assert_eq!(s.recall_misses, 1);
712 assert_eq!(s.miss_by_skill.get("xlsx"), Some(&1));
713 }
714
715 #[test]
716 fn empty_log_is_zero() {
717 assert_eq!(aggregate(""), Stats::default());
718 }
719
720 #[test]
721 fn pct_guards_zero() {
722 assert_eq!(pct(0, 0), "0%");
723 assert_eq!(pct(1, 2), "50%");
724 }
725
726 const DETAIL_LOG: &str = r#"
727{"ts":1000,"kind":"recommend","session":"sess-abcdef-1","stage":"cosine","prompt":"make a pdf","candidates":[{"id":"pdf","confidence":0.8},{"id":"docx","confidence":0.4}],"injected":[{"id":"pdf","confidence":0.8}]}
728{"ts":2000,"kind":"use","session":"sess-abcdef-1","skill":"pdf","via":"skill"}
729{"ts":3000,"kind":"recommend","session":"other-2","stage":"rerank","prompt":"line1\nline2","candidates":[{"id":"xlsx","confidence":0.5}],"injected":[{"id":"xlsx","confidence":0.5}]}
730garbage
731"#;
732
733 #[test]
734 fn recommend_events_parses_detail() {
735 let evs = recommend_events(DETAIL_LOG);
736 assert_eq!(evs.len(), 2);
737 let first = &evs[0];
738 assert_eq!(first.ts, 1000);
739 assert_eq!(first.session, "sess-abcdef-1");
740 assert_eq!(first.stage, "cosine");
741 assert_eq!(first.prompt, "make a pdf");
742 assert_eq!(first.candidates.len(), 2);
743 assert_eq!(
744 first.injected,
745 vec![Cand {
746 id: "pdf".into(),
747 confidence: 0.8
748 }]
749 );
750 }
751
752 #[test]
753 fn used_by_session_collects_use_events() {
754 let used = used_by_session(DETAIL_LOG);
755 assert!(used.get("sess-abcdef-1").unwrap().contains("pdf"));
756 assert!(!used.contains_key("other-2"));
757 }
758
759 #[test]
760 fn use_events_parse_prompt_when_present() {
761 let log = r#"
762{"ts":5000,"kind":"use","session":"s1","skill":"xlsx","via":"read","prompt":"clean this csv"}
763{"ts":6000,"kind":"use","session":"s1","skill":"pdf","via":"skill"}
764"#;
765 let evs = use_events(log);
766 assert_eq!(evs.len(), 2);
767 assert_eq!(evs[0].skill, "xlsx");
768 assert_eq!(evs[0].via, "read");
769 assert_eq!(evs[0].prompt, "clean this csv");
770 assert_eq!(evs[1].prompt, ""); }
772
773 #[test]
774 fn recommended_by_session_uses_injected_ids() {
775 let recd = recommended_by_session(DETAIL_LOG);
776 assert!(recd.get("sess-abcdef-1").unwrap().contains("pdf"));
777 assert!(!recd.get("sess-abcdef-1").unwrap().contains("docx"));
779 assert!(recd.get("other-2").unwrap().contains("xlsx"));
780 }
781
782 #[test]
783 fn ago_buckets() {
784 assert_eq!(ago(0, 10_000), "?");
785 assert_eq!(ago(20_000, 10_000), "?"); assert_eq!(ago(9_000, 10_000), "1s ago");
787 assert_eq!(ago(0, 120_000 + 1), "?");
788 assert_eq!(ago(1_000, 121_000), "2m ago");
789 assert_eq!(ago(1_000, 7_201_000), "2h ago");
790 assert_eq!(ago(1_000, 172_801_000), "2d ago");
791 }
792
793 #[test]
794 fn truncate_caps_and_flattens() {
795 assert_eq!(truncate("a\nb", 10), "a b");
796 assert_eq!(truncate("abcdef", 3), "abc…");
797 assert_eq!(short("sess-abcdef-1"), "sess-abc");
798 }
799
800 #[test]
801 fn recommend_events_parse_considered_and_abstained() {
802 let log = r#"
803{"ts":1,"kind":"recommend","session":"s","stage":"rerank","prompt":"p","considered":[{"id":"xlsx","score":-1.96},{"id":"pdf","score":-2.1}],"candidates":[],"injected":[],"abstained":"below_gate"}
804"#;
805 let evs = recommend_events(log);
806 assert_eq!(evs.len(), 1);
807 assert_eq!(
808 evs[0].considered,
809 vec![
810 Ranked {
811 id: "xlsx".into(),
812 score: -1.96
813 },
814 Ranked {
815 id: "pdf".into(),
816 score: -2.1
817 },
818 ]
819 );
820 assert_eq!(evs[0].abstained.as_deref(), Some("below_gate"));
821 let legacy =
823 r#"{"kind":"recommend","session":"s","injected":[{"id":"pdf","confidence":0.7}]}"#;
824 let ev = &recommend_events(legacy)[0];
825 assert!(ev.considered.is_empty());
826 assert_eq!(ev.abstained, None);
827 }
828
829 const COMPARE_LOG: &str = r#"
831{"ts":1000,"kind":"recommend","session":"s1","stage":"rerank","prompt":"make a chart","considered":[{"id":"xlsx","score":-1.9},{"id":"pdf","score":-2.1}],"candidates":[],"injected":[],"abstained":"below_gate"}
832{"ts":1100,"kind":"use","session":"s1","skill":"xlsx","via":"skill","prompt":"make a chart"}
833{"ts":2000,"kind":"recommend","session":"s2","stage":"cosine","prompt":"set up python","considered":[{"id":"uv-setup","score":0.7}],"candidates":[{"id":"uv-setup","confidence":0.7}],"injected":[{"id":"uv-setup","confidence":0.7}]}
834{"ts":2100,"kind":"use","session":"s2","skill":"uv-setup","via":"skill","prompt":"set up python"}
835{"ts":3000,"kind":"recommend","session":"s3","stage":"rerank","prompt":"deep","considered":[{"id":"a","score":0.1},{"id":"b","score":0.1},{"id":"c","score":0.1},{"id":"d","score":0.1},{"id":"gold","score":0.0}],"candidates":[],"injected":[],"abstained":"below_gate"}
836{"ts":3100,"kind":"use","session":"s3","skill":"gold","via":"read","prompt":"deep"}
837{"ts":4000,"kind":"recommend","session":"s4","stage":"cosine","prompt":"weird","considered":[{"id":"x","score":0.2}],"candidates":[],"injected":[],"abstained":"below_gate"}
838{"ts":4100,"kind":"use","session":"s4","skill":"notranked","via":"skill","prompt":"weird"}
839{"ts":5100,"kind":"use","session":"s5","skill":"orphan","via":"skill","prompt":"no rec here"}
840"#;
841
842 #[test]
843 fn compare_classifies_each_verdict() {
844 let rows = compare(COMPARE_LOG);
845 let by: std::collections::HashMap<&str, &Verdict> = rows
846 .iter()
847 .map(|r| (r.native.as_str(), &r.verdict))
848 .collect();
849 assert_eq!(
850 by["xlsx"],
851 &Verdict::NearMiss {
852 rank: 1,
853 score: -1.9
854 }
855 );
856 assert_eq!(by["uv-setup"], &Verdict::Agreed);
857 assert_eq!(
858 by["gold"],
859 &Verdict::Buried {
860 rank: 5,
861 score: 0.0
862 }
863 );
864 assert_eq!(by["notranked"], &Verdict::Absent);
865 assert_eq!(by["orphan"], &Verdict::NoRanking);
867 }
868
869 #[test]
870 fn classify_near_miss_rank_boundary() {
871 let log = r#"
873{"ts":1,"kind":"recommend","session":"s","stage":"rerank","prompt":"p","considered":[{"id":"a","score":0.3},{"id":"b","score":0.2},{"id":"gold","score":0.1},{"id":"d","score":0.0}],"candidates":[],"injected":[],"abstained":"below_gate"}
874"#;
875 let ev = &recommend_events(log)[0];
876 assert_eq!(
877 classify(ev, "gold"),
878 Verdict::NearMiss {
879 rank: 3,
880 score: 0.1
881 }
882 );
883 assert_eq!(
884 classify(ev, "d"),
885 Verdict::Buried {
886 rank: 4,
887 score: 0.0
888 }
889 );
890 }
891
892 #[test]
893 fn match_recommend_prefers_latest_at_or_before_use() {
894 let log = r#"
896{"ts":1000,"kind":"recommend","session":"s","stage":"cosine","prompt":"p","considered":[{"id":"a","score":0.5}],"candidates":[],"injected":[],"abstained":"below_gate"}
897{"ts":3000,"kind":"recommend","session":"s","stage":"cosine","prompt":"p","considered":[{"id":"a","score":0.9}],"candidates":[],"injected":[],"abstained":"below_gate"}
898"#;
899 let recs = recommend_events(log);
900 let u = UseEvent {
901 ts: 2000,
902 session: "s".into(),
903 skill: "a".into(),
904 via: "skill".into(),
905 prompt: "p".into(),
906 };
907 assert_eq!(match_recommend(&recs, &u).unwrap().ts, 1000);
909 }
910}