use serde::{Deserialize, Serialize};
use crate::hardware::HardwareInfo;
use crate::intent::{Privacy, QualityTier, UseCase};
use crate::nudge::NudgeState;
use crate::recommend::{recommend, FitStatus, Recommendation};
use crate::schema::ModelSchema;
use crate::update_prefs::{UpdatePolicy, UpdatePreferences};
use crate::usage_profile::{LaneUsage, UsageProfile};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum ConciergeMode {
Observe,
Answer,
Ask,
Act,
}
#[derive(Debug, Clone, Serialize)]
pub struct ConciergeDecision {
pub mode: ConciergeMode,
pub confidence: f64,
pub evidence: Vec<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub suggestion: Option<ConciergeSuggestion>,
}
impl ConciergeDecision {
pub fn observe() -> Self {
Self {
mode: ConciergeMode::Observe,
confidence: 0.0,
evidence: Vec::new(),
suggestion: None,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum DismissReason {
NotNow,
Wrong,
TooExpensive,
Privacy,
NeverForProject,
}
impl DismissReason {
pub fn is_permanent(self) -> bool {
!matches!(self, DismissReason::NotNow)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum CanaryVerdict {
Keep,
Revert,
Insufficient,
}
pub fn canary_verdict(
new_success_rate: Option<f64>,
new_samples: u64,
baseline_success_rate: f64,
min_samples: u64,
margin: f64,
) -> CanaryVerdict {
match new_success_rate {
Some(r) if new_samples >= min_samples => {
if r + margin < baseline_success_rate {
CanaryVerdict::Revert
} else {
CanaryVerdict::Keep
}
}
_ => CanaryVerdict::Insufficient,
}
}
pub const CANARY_MIN_SAMPLES: u64 = 5;
pub const CANARY_REGRESSION_MARGIN: f64 = 0.1;
#[derive(Debug, Clone, Serialize)]
pub struct ModelHealth {
pub model_id: String,
pub calls: u64,
pub success_rate: Option<f64>,
pub avg_latency_ms: f64,
pub quality: f64,
pub excluded: bool,
}
#[derive(Debug, Clone, Serialize)]
pub struct PendingVerification {
pub use_case: UseCase,
pub model_id: String,
pub set_at: u64,
pub resolved_samples: u64,
pub needed: u64,
}
#[derive(Debug, Clone, Serialize)]
pub struct ConciergeStatus {
pub lanes: Vec<LaneUsage>,
pub decision: ConciergeDecision,
pub models: Vec<ModelHealth>,
#[serde(default)]
pub pending_verification: Vec<PendingVerification>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct DismissalRecord {
pub key: String,
pub reason: DismissReason,
pub timestamp: u64,
}
pub const SOFT_DISMISS_COOLDOWN_SECS: u64 = 14 * 24 * 60 * 60;
const MIN_RESOLVED_SAMPLES: u64 = 5;
const FRICTION_SUCCESS_RATE: f64 = 0.6;
const MIN_DIAGNOSIS_CONFIDENCE: f64 = 0.25;
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ConciergeSuggestion {
pub use_case: UseCase,
pub model_id: String,
pub display_name: String,
pub download_mb: u64,
pub message: String,
pub dismiss_key: String,
}
pub const DEFAULT_CONCIERGE_THROTTLE_SECS: u64 = 7 * 24 * 60 * 60;
pub const DEFAULT_WATCHED_USE_CASES: &[UseCase] = &[UseCase::Assistant, UseCase::Coding];
fn use_case_slug(use_case: UseCase) -> &'static str {
match use_case {
UseCase::Assistant => "assistant",
UseCase::Coding => "coding",
UseCase::Summarize => "summarize",
UseCase::Vision => "vision",
UseCase::Transcription => "transcription",
UseCase::Search => "search",
}
}
fn dismiss_key(use_case: UseCase, model_id: &str) -> String {
format!("concierge:{}=>{model_id}", use_case_slug(use_case))
}
#[allow(clippy::too_many_arguments)]
pub fn decide_concierge(
models: &[&ModelSchema],
hw: &HardwareInfo,
use_cases: &[UseCase],
tier: QualityTier,
prefs: &UpdatePreferences,
state: &NudgeState,
now_secs: u64,
throttle_secs: u64,
inference_active: bool,
) -> Vec<ConciergeSuggestion> {
if inference_active || matches!(prefs.policy, UpdatePolicy::Off) {
return Vec::new();
}
let throttled = state.last_concierge_secs != 0
&& now_secs.saturating_sub(state.last_concierge_secs) < throttle_secs;
if throttled {
return Vec::new();
}
let mut out = Vec::new();
for &use_case in use_cases {
let set = recommend(models, hw, use_case, tier, Privacy::OnDevice);
if set.picks.iter().any(|p| p.already_installed) {
continue;
}
let pick = match set
.picks
.iter()
.find(|p| !p.already_installed && p.is_local && p.fit == FitStatus::Fits)
{
Some(p) => p,
None => continue,
};
let key = dismiss_key(use_case, &pick.model_id);
if state.dismissed.iter().any(|k| k == &key) {
continue;
}
out.push(ConciergeSuggestion {
use_case,
model_id: pick.model_id.clone(),
display_name: pick.display_name.clone(),
download_mb: pick.download_mb,
message: suggestion_message(use_case, pick),
dismiss_key: key,
});
}
out
}
fn suggestion_message(use_case: UseCase, pick: &Recommendation) -> String {
let purpose = match use_case {
UseCase::Assistant => "chat & general help",
UseCase::Coding => "coding",
UseCase::Summarize => "summarizing",
UseCase::Vision => "understanding images",
UseCase::Transcription => "transcription",
UseCase::Search => "semantic search",
};
let mb = pick.download_mb;
let size = if mb >= 1024 {
format!("{:.1} GB", mb as f64 / 1024.0)
} else {
format!("{mb} MB")
};
format!(
"You have no model for {purpose}. {} fits your machine ({size}) — install it?",
pick.display_name
)
}
#[allow(clippy::too_many_arguments)]
pub fn evaluate_concierge(
models: &[&ModelSchema],
hw: &HardwareInfo,
usage: &UsageProfile,
tier: QualityTier,
prefs: &UpdatePreferences,
state: &NudgeState,
now_secs: u64,
throttle_secs: u64,
inference_active: bool,
) -> ConciergeDecision {
if matches!(prefs.policy, UpdatePolicy::Off) || inference_active {
return ConciergeDecision::observe();
}
let throttled = state.last_concierge_secs != 0
&& now_secs.saturating_sub(state.last_concierge_secs) < throttle_secs;
for lane in usage.active_lanes() {
let resolved = lane.successes + lane.failures;
if resolved < MIN_RESOLVED_SAMPLES {
continue;
}
let Some(success_rate) = lane.success_rate() else {
continue;
};
if success_rate >= FRICTION_SUCCESS_RATE {
continue;
}
let confidence = diagnosis_confidence(resolved, success_rate);
if confidence < MIN_DIAGNOSIS_CONFIDENCE {
continue;
}
let set = recommend(models, hw, lane.use_case, tier, Privacy::OnDevice);
let failing_score = set
.picks
.iter()
.filter(|p| lane.failing_models.contains(&p.model_id))
.map(|p| p.score)
.fold(f32::NEG_INFINITY, f32::max);
let Some(pick) = set.picks.iter().find(|p| {
p.is_local
&& p.fit == FitStatus::Fits
&& !lane.failing_models.contains(&p.model_id)
}) else {
continue;
};
if pick.already_installed && lane.models_used.contains(&pick.model_id) {
continue;
}
if !(failing_score.is_finite() && pick.score > failing_score) {
continue;
}
let key = dismiss_key(lane.use_case, &pick.model_id);
if is_dismissed(state, &key, now_secs) {
continue;
}
if throttled {
return ConciergeDecision::observe();
}
let failing = if lane.failing_models.is_empty() {
String::new()
} else {
format!(
"; failing on {}",
lane.failing_models
.iter()
.cloned()
.collect::<Vec<_>>()
.join(", ")
)
};
let evidence = vec![format!(
"{} lane: {}/{} resolved calls succeeded ({:.0}%) across {} call(s){}",
lane_label(lane.use_case),
lane.successes,
resolved,
success_rate * 100.0,
lane.calls,
failing
)];
let suggestion = ConciergeSuggestion {
use_case: lane.use_case,
model_id: pick.model_id.clone(),
display_name: pick.display_name.clone(),
download_mb: pick.download_mb,
message: format!(
"Your {} model is struggling ({:.0}% success). {} fits this machine and ranks higher for this task.",
lane_label(lane.use_case),
success_rate * 100.0,
pick.display_name
),
dismiss_key: key,
};
return ConciergeDecision {
mode: ConciergeMode::Act,
confidence,
evidence,
suggestion: Some(suggestion),
};
}
ConciergeDecision::observe()
}
fn is_dismissed(state: &NudgeState, key: &str, now_secs: u64) -> bool {
if state.dismissed.iter().any(|k| k == key) {
return true;
}
state.concierge_dismissals.iter().any(|d| {
d.key == key
&& (d.reason.is_permanent()
|| now_secs.saturating_sub(d.timestamp) < SOFT_DISMISS_COOLDOWN_SECS)
})
}
fn diagnosis_confidence(resolved: u64, success_rate: f64) -> f64 {
let severity = (1.0 - success_rate).clamp(0.0, 1.0);
let sample_factor = resolved as f64 / (resolved as f64 + 10.0);
(severity * sample_factor).clamp(0.0, 1.0)
}
fn lane_label(use_case: UseCase) -> &'static str {
match use_case {
UseCase::Assistant => "assistant",
UseCase::Coding => "coding",
UseCase::Summarize => "summarization",
UseCase::Vision => "vision",
UseCase::Transcription => "transcription",
UseCase::Search => "search",
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::hardware::GpuBackend;
use crate::schema::{
CostModel, ModelCapability, ModelSchema, ModelSource, PerformanceEnvelope, TrustTier,
};
fn model(id: &str, installed: bool, download_mb: u64, caps: &[ModelCapability]) -> ModelSchema {
ModelSchema {
id: id.into(),
name: id.into(),
provider: "qwen".into(),
family: "qwen3".into(),
version: String::new(),
capabilities: caps.to_vec(),
context_length: 32768,
max_output_tokens: None,
param_count: "4B".into(),
quantization: Some("Q4_K_M".into()),
performance: PerformanceEnvelope::default(),
cost: CostModel {
size_mb: Some(download_mb),
ram_mb: Some(download_mb),
..Default::default()
},
source: ModelSource::Local {
hf_repo: "x/y".into(),
hf_filename: "m.gguf".into(),
tokenizer_repo: "x/y".into(),
},
tags: vec![],
supported_params: vec![],
public_benchmarks: vec![],
trust_tier: TrustTier::Curated,
deprecated: false,
available: installed,
}
}
fn prefs(policy: UpdatePolicy) -> UpdatePreferences {
UpdatePreferences {
policy,
..Default::default()
}
}
fn hw() -> HardwareInfo {
HardwareInfo {
os: "test".into(),
arch: "test".into(),
cpu_cores: 8,
total_ram_mb: 16_384,
gpu_backend: GpuBackend::Metal,
gpu_memory_mb: None,
gpu_devices: vec![],
recommended_model: String::new(),
recommended_context: 4096,
max_model_mb: 0,
}
}
#[test]
fn off_policy_suggests_nothing() {
let m = [model("chat-a", false, 2000, &[ModelCapability::Generate])];
let refs: Vec<&ModelSchema> = m.iter().collect();
let out = decide_concierge(
&refs,
&hw(),
&[UseCase::Assistant],
QualityTier::Balanced,
&prefs(UpdatePolicy::Off),
&NudgeState::default(),
100,
10,
false,
);
assert!(out.is_empty());
}
#[test]
fn active_inference_defers() {
let m = [model("chat-a", false, 2000, &[ModelCapability::Generate])];
let refs: Vec<&ModelSchema> = m.iter().collect();
let out = decide_concierge(
&refs,
&hw(),
&[UseCase::Assistant],
QualityTier::Balanced,
&prefs(UpdatePolicy::Notify),
&NudgeState::default(),
100,
10,
true,
);
assert!(out.is_empty());
}
#[test]
fn suggests_when_lane_unserved() {
let m = [model("chat-a", false, 2000, &[ModelCapability::Generate])];
let refs: Vec<&ModelSchema> = m.iter().collect();
let out = decide_concierge(
&refs,
&hw(),
&[UseCase::Assistant],
QualityTier::Balanced,
&prefs(UpdatePolicy::Notify),
&NudgeState::default(),
100,
10,
false,
);
assert_eq!(out.len(), 1);
assert_eq!(out[0].use_case, UseCase::Assistant);
assert_eq!(out[0].model_id, "chat-a");
assert!(out[0].message.contains("install it?"));
}
#[test]
fn served_lane_suggests_nothing() {
let m = [model("chat-a", true, 2000, &[ModelCapability::Generate])];
let refs: Vec<&ModelSchema> = m.iter().collect();
let out = decide_concierge(
&refs,
&hw(),
&[UseCase::Assistant],
QualityTier::Balanced,
&prefs(UpdatePolicy::Notify),
&NudgeState::default(),
100,
10,
false,
);
assert!(out.is_empty());
}
#[test]
fn dismissed_suggestion_not_repeated() {
let m = [model("chat-a", false, 2000, &[ModelCapability::Generate])];
let refs: Vec<&ModelSchema> = m.iter().collect();
let mut state = NudgeState::default();
state.dismiss(&dismiss_key(UseCase::Assistant, "chat-a"));
let out = decide_concierge(
&refs,
&hw(),
&[UseCase::Assistant],
QualityTier::Balanced,
&prefs(UpdatePolicy::Notify),
&state,
100,
10,
false,
);
assert!(out.is_empty());
}
#[test]
fn throttled_within_window() {
let m = [model("chat-a", false, 2000, &[ModelCapability::Generate])];
let refs: Vec<&ModelSchema> = m.iter().collect();
let state = NudgeState {
last_concierge_secs: 95,
..Default::default()
};
let out = decide_concierge(
&refs,
&hw(),
&[UseCase::Assistant],
QualityTier::Balanced,
&prefs(UpdatePolicy::Notify),
&state,
100,
10,
false,
);
assert!(out.is_empty());
}
#[test]
fn not_throttled_by_upgrade_nudge_field() {
let m = [model("chat-a", false, 2000, &[ModelCapability::Generate])];
let refs: Vec<&ModelSchema> = m.iter().collect();
let state = NudgeState {
last_nudge_secs: 99, last_concierge_secs: 0, ..Default::default()
};
let out = decide_concierge(
&refs,
&hw(),
&[UseCase::Assistant],
QualityTier::Balanced,
&prefs(UpdatePolicy::Notify),
&state,
100,
10,
false,
);
assert_eq!(out.len(), 1, "concierge must fire regardless of the upgrade nudge's window");
}
#[test]
fn dismiss_key_is_stable_slug_not_debug() {
assert_eq!(dismiss_key(UseCase::Assistant, "chat-a"), "concierge:assistant=>chat-a");
assert_eq!(dismiss_key(UseCase::Coding, "code-x"), "concierge:coding=>code-x");
}
}
#[cfg(test)]
mod b3_tests {
use super::*;
use crate::hardware::GpuBackend;
use crate::outcome::{InferenceTask, OutcomeLedgerEntry};
use crate::schema::{
CostModel, ModelCapability, ModelSchema, ModelSource, PerformanceEnvelope, TrustTier,
};
fn model(id: &str, installed: bool, download_mb: u64, caps: &[ModelCapability]) -> ModelSchema {
ModelSchema {
id: id.into(),
name: id.into(),
provider: "qwen".into(),
family: "qwen3".into(),
version: String::new(),
capabilities: caps.to_vec(),
context_length: 32768,
max_output_tokens: None,
param_count: "4B".into(),
quantization: Some("Q4_K_M".into()),
performance: PerformanceEnvelope::default(),
cost: CostModel {
size_mb: Some(download_mb),
ram_mb: Some(download_mb),
..Default::default()
},
source: ModelSource::Local {
hf_repo: "x/y".into(),
hf_filename: "m.gguf".into(),
tokenizer_repo: "x/y".into(),
},
tags: vec![],
supported_params: vec![],
public_benchmarks: vec![],
trust_tier: TrustTier::Curated,
deprecated: false,
available: installed,
}
}
fn hw() -> HardwareInfo {
HardwareInfo {
os: "test".into(),
arch: "test".into(),
cpu_cores: 8,
total_ram_mb: 16_384,
gpu_backend: GpuBackend::Metal,
gpu_memory_mb: None,
gpu_devices: vec![],
recommended_model: String::new(),
recommended_context: 4096,
max_model_mb: 0,
}
}
fn prefs(policy: UpdatePolicy) -> UpdatePreferences {
UpdatePreferences {
policy,
..Default::default()
}
}
fn coding_ledger(failing_model: &str, successes: usize, failures: usize) -> Vec<OutcomeLedgerEntry> {
(0..(successes + failures))
.map(|i| OutcomeLedgerEntry {
trace_id: format!("t{i}"),
model_id: failing_model.to_string(),
task: InferenceTask::Code,
routing_reason: "r".into(),
latency_ms: 100,
input_tokens: 1,
output_tokens: 1,
success: Some(i < successes),
quality: None,
error: None,
project_id: None,
intent: None,
timestamp: 100,
})
.collect()
}
fn coding_models() -> Vec<ModelSchema> {
let mut small = model(
"small-coder",
true,
800,
&[ModelCapability::Generate, ModelCapability::Code],
);
small.param_count = "1B".into();
let mut big = model(
"big-coder",
false,
5000,
&[ModelCapability::Generate, ModelCapability::Code],
);
big.param_count = "7B".into();
vec![small, big]
}
#[test]
fn friction_triggers_act_with_receipts() {
let m = coding_models();
let refs: Vec<&ModelSchema> = m.iter().collect();
let entries = coding_ledger("small-coder", 3, 5); let usage = UsageProfile::from_ledger(&entries, 100, 0);
let decision = evaluate_concierge(
&refs,
&hw(),
&usage,
QualityTier::Balanced,
&prefs(UpdatePolicy::Notify),
&NudgeState::default(),
1_000_000,
DEFAULT_CONCIERGE_THROTTLE_SECS,
false,
);
assert_eq!(decision.mode, ConciergeMode::Act);
let s = decision.suggestion.expect("a suggestion");
assert_eq!(s.use_case, UseCase::Coding);
assert_ne!(s.model_id, "small-coder", "must not suggest the failing model");
assert!(decision.confidence > 0.0);
assert!(!decision.evidence.is_empty());
}
#[test]
fn canary_reverts_only_on_worse_with_samples() {
assert_eq!(
canary_verdict(Some(0.4), 6, 0.8, 5, 0.1),
CanaryVerdict::Revert
);
assert_eq!(
canary_verdict(Some(0.75), 6, 0.8, 5, 0.1),
CanaryVerdict::Keep
);
assert_eq!(
canary_verdict(Some(0.95), 6, 0.8, 5, 0.1),
CanaryVerdict::Keep
);
assert_eq!(
canary_verdict(Some(0.0), 2, 0.8, 5, 0.1),
CanaryVerdict::Insufficient
);
assert_eq!(
canary_verdict(None, 0, 0.8, 5, 0.1),
CanaryVerdict::Insufficient
);
}
#[test]
fn one_stray_failure_is_not_friction() {
let m = coding_models();
let refs: Vec<&ModelSchema> = m.iter().collect();
let entries = coding_ledger("small-coder", 49, 1);
let usage = UsageProfile::from_ledger(&entries, 100, 0);
let decision = evaluate_concierge(
&refs, &hw(), &usage, QualityTier::Balanced,
&prefs(UpdatePolicy::Notify), &NudgeState::default(),
1_000_000, DEFAULT_CONCIERGE_THROTTLE_SECS, false,
);
assert_eq!(decision.mode, ConciergeMode::Observe);
}
#[test]
fn healthy_lane_stays_silent() {
let m = coding_models();
let refs: Vec<&ModelSchema> = m.iter().collect();
let entries = coding_ledger("small-coder", 8, 0); let usage = UsageProfile::from_ledger(&entries, 100, 0);
let decision = evaluate_concierge(
&refs, &hw(), &usage, QualityTier::Balanced,
&prefs(UpdatePolicy::Notify), &NudgeState::default(),
1_000_000, DEFAULT_CONCIERGE_THROTTLE_SECS, false,
);
assert_eq!(decision.mode, ConciergeMode::Observe);
}
#[test]
fn throttled_diagnosis_downgrades_to_observe() {
let m = coding_models();
let refs: Vec<&ModelSchema> = m.iter().collect();
let entries = coding_ledger("small-coder", 2, 6);
let usage = UsageProfile::from_ledger(&entries, 100, 0);
let now = 1_000_000;
let state = NudgeState {
last_concierge_secs: now - 10, ..Default::default()
};
let decision = evaluate_concierge(
&refs, &hw(), &usage, QualityTier::Balanced,
&prefs(UpdatePolicy::Notify), &state, now,
DEFAULT_CONCIERGE_THROTTLE_SECS, false,
);
assert_eq!(decision.mode, ConciergeMode::Observe);
}
#[test]
fn permanent_dismissal_suppresses() {
let m = coding_models();
let refs: Vec<&ModelSchema> = m.iter().collect();
let entries = coding_ledger("small-coder", 2, 6);
let usage = UsageProfile::from_ledger(&entries, 100, 0);
let state = NudgeState {
concierge_dismissals: vec![DismissalRecord {
key: dismiss_key(UseCase::Coding, "big-coder"),
reason: DismissReason::Wrong,
timestamp: 0,
}],
..Default::default()
};
let decision = evaluate_concierge(
&refs, &hw(), &usage, QualityTier::Balanced,
&prefs(UpdatePolicy::Notify), &state, 1_000_000,
DEFAULT_CONCIERGE_THROTTLE_SECS, false,
);
assert_eq!(decision.mode, ConciergeMode::Observe);
}
#[test]
fn policy_off_is_silent() {
let m = coding_models();
let refs: Vec<&ModelSchema> = m.iter().collect();
let entries = coding_ledger("small-coder", 2, 6);
let usage = UsageProfile::from_ledger(&entries, 100, 0);
let decision = evaluate_concierge(
&refs, &hw(), &usage, QualityTier::Balanced,
&prefs(UpdatePolicy::Off), &NudgeState::default(),
1_000_000, DEFAULT_CONCIERGE_THROTTLE_SECS, false,
);
assert_eq!(decision.mode, ConciergeMode::Observe);
}
}