1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
//! UsageProfile — what the user *actually does*, aggregated from the
//! outcome ledger (Phase B1).
//!
//! The old concierge triggered on "is a lane empty?". This turns the
//! durable per-call receipts (`OutcomeLedgerEntry`) into per-lane
//! evidence — activity, success, latency, and friction — so the
//! concierge can speak from what happened ("your coding lane failed 4 of
//! the last 9 calls on model X") instead of from a static catalog.
//!
//! Pure over its inputs (the caller supplies the ledger slice via
//! `read_ledger`), so it's fully unit-testable without disk.
use std::collections::{BTreeMap, BTreeSet};
use serde::Serialize;
use crate::intent::{TaskHint, UseCase};
use crate::outcome::{InferenceTask, OutcomeLedgerEntry};
/// Map a recorded inference task to the concierge use-case lane it
/// belongs to, so receipts (keyed by `InferenceTask`) aggregate into the
/// `UseCase` lanes the recommender ranks within.
pub fn use_case_for_task(task: InferenceTask) -> UseCase {
match task {
InferenceTask::Code => UseCase::Coding,
InferenceTask::Classify => UseCase::Summarize,
InferenceTask::Embed => UseCase::Search,
// Generate and Reasoning are both general generative chat from the
// lane's perspective.
InferenceTask::Generate | InferenceTask::Reasoning => UseCase::Assistant,
}
}
/// Map a request's inference-time `TaskHint` to the concierge use-case
/// lane, so a request's intent resolves to the lane whose default model
/// should apply. Mirrors `use_case_for_task` (which maps the recorded
/// `InferenceTask`).
pub fn use_case_for_task_hint(hint: TaskHint) -> UseCase {
match hint {
TaskHint::Code => UseCase::Coding,
TaskHint::Classify => UseCase::Summarize,
// Chat and Reasoning are both general generative chat.
TaskHint::Chat | TaskHint::Reasoning => UseCase::Assistant,
}
}
/// Aggregated activity + friction for one use-case lane.
#[derive(Debug, Clone, Serialize)]
pub struct LaneUsage {
pub use_case: UseCase,
/// All receipts in the window for this lane (resolved + inconclusive).
pub calls: u64,
pub successes: u64,
pub failures: u64,
/// Completed-but-unresolved receipts (no follow-up signal). Tracked
/// so the resolution rate is visible and stats aren't read as if every
/// call were judged.
pub inconclusive: u64,
pub total_latency_ms: u64,
/// Mean quality over receipts that carried a quality signal.
pub avg_quality: f64,
/// Distinct models seen serving this lane.
pub models_used: BTreeSet<String>,
/// Models that produced at least one failure in this lane — the
/// friction signal a suggestion is built on.
pub failing_models: BTreeSet<String>,
}
impl LaneUsage {
fn new(use_case: UseCase) -> Self {
Self {
use_case,
calls: 0,
successes: 0,
failures: 0,
inconclusive: 0,
total_latency_ms: 0,
avg_quality: 0.0,
models_used: BTreeSet::new(),
failing_models: BTreeSet::new(),
}
}
/// Success rate over *resolved* receipts (successes + failures);
/// `None` when nothing in the lane was resolved (don't infer quality
/// from inconclusive-only data).
pub fn success_rate(&self) -> Option<f64> {
let resolved = self.successes + self.failures;
if resolved == 0 {
None
} else {
Some(self.successes as f64 / resolved as f64)
}
}
/// Fraction of calls that got a resolution signal — context for how
/// much to trust `success_rate`.
pub fn resolution_rate(&self) -> f64 {
if self.calls == 0 {
0.0
} else {
(self.successes + self.failures) as f64 / self.calls as f64
}
}
pub fn avg_latency_ms(&self) -> f64 {
if self.calls == 0 {
0.0
} else {
self.total_latency_ms as f64 / self.calls as f64
}
}
}
/// Per-lane usage rolled up from the ledger over a recency window.
#[derive(Debug, Clone, Serialize)]
pub struct UsageProfile {
pub lanes: BTreeMap<UseCase, LaneUsage>,
pub total_calls: u64,
/// The recency window (seconds) the profile was built over.
pub window_secs: u64,
}
impl UsageProfile {
/// Build from ledger receipts, keeping only those within
/// `window_secs` of `now`. `window_secs == 0` means "all history".
pub fn from_ledger(entries: &[OutcomeLedgerEntry], now: u64, window_secs: u64) -> Self {
let cutoff = if window_secs == 0 {
0
} else {
now.saturating_sub(window_secs)
};
// Running quality sum + count per lane for the mean.
let mut quality_sum: BTreeMap<UseCase, (f64, u64)> = BTreeMap::new();
let mut lanes: BTreeMap<UseCase, LaneUsage> = BTreeMap::new();
let mut total_calls = 0u64;
for e in entries.iter().filter(|e| e.timestamp >= cutoff) {
let uc = use_case_for_task(e.task);
let lane = lanes.entry(uc).or_insert_with(|| LaneUsage::new(uc));
lane.calls += 1;
total_calls += 1;
lane.total_latency_ms += e.latency_ms;
lane.models_used.insert(e.model_id.clone());
match e.success {
Some(true) => lane.successes += 1,
Some(false) => {
lane.failures += 1;
lane.failing_models.insert(e.model_id.clone());
}
None => lane.inconclusive += 1,
}
if let Some(q) = e.quality {
let entry = quality_sum.entry(uc).or_insert((0.0, 0));
entry.0 += q;
entry.1 += 1;
}
}
for (uc, (sum, count)) in quality_sum {
if let Some(lane) = lanes.get_mut(&uc) {
if count > 0 {
lane.avg_quality = sum / count as f64;
}
}
}
Self {
lanes,
total_calls,
window_secs,
}
}
pub fn lane(&self, use_case: UseCase) -> Option<&LaneUsage> {
self.lanes.get(&use_case)
}
/// Lanes the user actually exercised, busiest first — what the
/// concierge should reason about (vs. lanes they never touch).
pub fn active_lanes(&self) -> Vec<&LaneUsage> {
let mut v: Vec<&LaneUsage> = self.lanes.values().collect();
v.sort_by(|a, b| b.calls.cmp(&a.calls));
v
}
}
#[cfg(test)]
mod tests {
use super::*;
fn entry(
model: &str,
task: InferenceTask,
success: Option<bool>,
quality: Option<f64>,
latency: u64,
ts: u64,
) -> OutcomeLedgerEntry {
OutcomeLedgerEntry {
trace_id: format!("t-{ts}-{model}"),
model_id: model.to_string(),
task,
routing_reason: "test".into(),
latency_ms: latency,
input_tokens: 10,
output_tokens: 20,
success,
quality,
error: None,
project_id: None,
intent: None,
timestamp: ts,
}
}
#[test]
fn aggregates_lanes_and_friction() {
let entries = vec![
// Coding: 2 success, 1 failure on "small-coder"
entry("big-coder", InferenceTask::Code, Some(true), Some(0.9), 1000, 100),
entry("big-coder", InferenceTask::Code, Some(true), Some(0.8), 1100, 110),
entry("small-coder", InferenceTask::Code, Some(false), None, 200, 120),
// Assistant: 1 inconclusive
entry("chat", InferenceTask::Generate, None, None, 300, 130),
];
let profile = UsageProfile::from_ledger(&entries, 200, 0);
assert_eq!(profile.total_calls, 4);
let coding = profile.lane(UseCase::Coding).unwrap();
assert_eq!(coding.calls, 3);
assert_eq!(coding.successes, 2);
assert_eq!(coding.failures, 1);
assert_eq!(coding.success_rate(), Some(2.0 / 3.0));
assert!(coding.failing_models.contains("small-coder"));
assert!(!coding.failing_models.contains("big-coder"));
assert!((coding.avg_quality - 0.85).abs() < 1e-9);
let assistant = profile.lane(UseCase::Assistant).unwrap();
assert_eq!(assistant.inconclusive, 1);
assert_eq!(assistant.success_rate(), None); // nothing resolved
assert!((assistant.resolution_rate() - 0.0).abs() < 1e-9);
// Busiest lane first.
assert_eq!(profile.active_lanes()[0].use_case, UseCase::Coding);
}
#[test]
fn window_filters_old_receipts() {
let entries = vec![
entry("m", InferenceTask::Generate, Some(true), Some(1.0), 1, 10),
entry("m", InferenceTask::Generate, Some(true), Some(1.0), 1, 1000),
];
// now=1000, window=100 → only the ts=1000 receipt survives.
let profile = UsageProfile::from_ledger(&entries, 1000, 100);
assert_eq!(profile.total_calls, 1);
}
}