use serde::{Deserialize, Serialize};
use crate::hardware::{HardwareInfo, SupportedAcceleration};
use crate::intent::{Privacy, QualityTier, UseCase, UseCaseRole};
use crate::schema::{ModelSchema, TrustTier};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum FitStatus {
Fits,
TooBig,
ServerProvided,
Unknown,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Recommendation {
pub model_id: String,
pub display_name: String,
pub role: UseCaseRole,
pub rationale: String,
pub download_mb: u64,
pub already_installed: bool,
pub fit: FitStatus,
pub acceleration: SupportedAcceleration,
pub is_local: bool,
pub requires_cloud_consent: bool,
pub trust_tier: TrustTier,
pub score: f32,
}
const OS_RESERVE_MB: u64 = 3072;
const OVERHEAD_METAL_MB: u64 = 512;
const OVERHEAD_CUDA_MB: u64 = 512;
const OVERHEAD_CPU_MB: u64 = 1024;
const SAFETY_MARGIN_MB: u64 = 1024;
const FIT_CONTEXT_TOKENS: usize = 8192;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RecommendationSet {
pub picks: Vec<Recommendation>,
pub not_enough_memory: Vec<Recommendation>,
pub note: Option<String>,
}
pub fn recommend(
models: &[&ModelSchema],
hw: &HardwareInfo,
use_case: UseCase,
tier: QualityTier,
privacy: Privacy,
) -> RecommendationSet {
let accel = hw.supported_acceleration();
let sort = |v: &mut Vec<Recommendation>| {
v.sort_by(|a, b| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
.then(b.already_installed.cmp(&a.already_installed))
.then(a.download_mb.cmp(&b.download_mb))
.then(a.model_id.cmp(&b.model_id))
});
};
let (mut picks, mut not_enough_memory): (Vec<_>, Vec<_>) = models
.iter()
.filter(|m| passes_base_filter(m, hw, use_case, privacy))
.map(|m| build_recommendation(m, hw, &accel, use_case, tier))
.partition(|r| r.fit != FitStatus::TooBig);
sort(&mut picks);
sort(&mut not_enough_memory);
let note = explain_if_needed(&picks, ¬_enough_memory, hw, use_case, tier, privacy);
RecommendationSet {
picks,
not_enough_memory,
note,
}
}
fn passes_base_filter(
m: &ModelSchema,
hw: &HardwareInfo,
use_case: UseCase,
privacy: Privacy,
) -> bool {
if m.deprecated {
return false;
}
if !use_case
.required_capabilities()
.iter()
.all(|c| m.has_capability(*c))
{
return false;
}
if privacy == Privacy::OnDevice && !m.is_local() {
return false;
}
if m.requires_apple_silicon()
&& !matches!(hw.supported_acceleration(), SupportedAcceleration::Apple { .. })
{
return false;
}
true
}
fn explain_if_needed(
picks: &[Recommendation],
too_big: &[Recommendation],
hw: &HardwareInfo,
use_case: UseCase,
tier: QualityTier,
privacy: Privacy,
) -> Option<String> {
let purpose = use_case_purpose(use_case);
if picks.is_empty() {
let ram_gb = hw.total_ram_mb / 1024;
return Some(if !too_big.is_empty() {
match privacy {
Privacy::OnDevice => format!(
"No on-device model for {purpose} fits your {ram_gb} GB machine. \
Free up memory, pick a smaller tier, or allow cloud models."
),
Privacy::CloudOk => format!(
"No local model for {purpose} fits your {ram_gb} GB machine, and no \
cloud model is configured. Add an API key or free up memory."
),
}
} else {
format!("No model available for {purpose} on this machine.")
});
}
if picks[0].requires_cloud_consent {
return Some(format!(
"The best {purpose} pick runs in the cloud and needs your OK before first use. \
{} fits locally if you prefer on-device.",
picks
.iter()
.find(|p| p.is_local)
.map(|p| p.display_name.as_str())
.unwrap_or("No local model")
));
}
let _ = tier;
None
}
fn use_case_purpose(use_case: UseCase) -> &'static str {
match use_case {
UseCase::Assistant => "chat & general help",
UseCase::Coding => "coding",
UseCase::Summarize => "summarizing",
UseCase::Vision => "understanding images",
UseCase::Transcription => "transcription",
UseCase::Search => "semantic search",
}
}
fn build_recommendation(
m: &ModelSchema,
hw: &HardwareInfo,
accel: &SupportedAcceleration,
use_case: UseCase,
tier: QualityTier,
) -> Recommendation {
let fit = fit_status(m, hw);
let quality = quality_score(m);
let latency = latency_score(m, accel);
let pressure = memory_pressure(m, hw);
let w = tier.weights();
let mut score =
w.quality * quality + w.latency * latency + w.memory_pressure * (1.0 - pressure);
let pref_hits = use_case
.preferred_capabilities()
.iter()
.filter(|c| m.has_capability(**c))
.count();
score += 0.05 * pref_hits as f32;
let is_local = m.is_local();
Recommendation {
model_id: m.id.clone(),
display_name: m.name.clone(),
role: use_case.role(),
rationale: rationale(m, hw, use_case, tier, fit, quality),
download_mb: m.size_mb(),
already_installed: m.available,
fit,
acceleration: accel.clone(),
is_local,
requires_cloud_consent: !is_local,
trust_tier: m.trust_tier,
score,
}
}
fn quality_score(m: &ModelSchema) -> f32 {
if !m.public_benchmarks.is_empty() {
let sum: f64 = m.public_benchmarks.iter().map(|b| b.score).sum();
return (sum / m.public_benchmarks.len() as f64).clamp(0.0, 1.0) as f32;
}
let b = param_billions_total(m).max(0.1);
(b / (b + 7.0)).clamp(0.0, 1.0) as f32
}
fn latency_score(m: &ModelSchema, accel: &SupportedAcceleration) -> f32 {
let b = param_billions_active(m).max(0.1);
let size_term = (8.0 / (b + 8.0)) as f32;
let accel_bonus = match accel {
SupportedAcceleration::Apple { .. } | SupportedAcceleration::Cuda { .. } => 0.1,
_ => 0.0,
};
(size_term + accel_bonus).clamp(0.0, 1.0)
}
fn memory_pressure(m: &ModelSchema, hw: &HardwareInfo) -> f32 {
let budget = memory_budget_mb(hw);
if budget == 0 {
return 1.0;
}
(memory_required_mb(m, hw) as f32 / budget as f32).clamp(0.0, 1.5)
}
fn fit_status(m: &ModelSchema, hw: &HardwareInfo) -> FitStatus {
if m.is_vllm_mlx() || m.is_remote() || m.is_delegated() {
return FitStatus::ServerProvided;
}
if m.is_foundation_models() {
return FitStatus::Fits;
}
if m.size_mb() == 0 && m.ram_mb() == 0 {
return FitStatus::Unknown;
}
if memory_required_mb(m, hw) + SAFETY_MARGIN_MB <= memory_budget_mb(hw) {
FitStatus::Fits
} else {
FitStatus::TooBig
}
}
fn memory_required_mb(m: &ModelSchema, hw: &HardwareInfo) -> u64 {
let weights = m.ram_mb().max(m.size_mb());
let kv = kv_cache_mb(m, FIT_CONTEXT_TOKENS);
weights + kv + backend_overhead_mb(hw)
}
fn kv_cache_mb(m: &ModelSchema, context_tokens: usize) -> u64 {
let per_1k = (param_billions_active(m) as f64 * 0.12).max(0.05);
((context_tokens as f64 / 1000.0) * per_1k).ceil() as u64
}
fn memory_budget_mb(hw: &HardwareInfo) -> u64 {
match hw.supported_acceleration() {
SupportedAcceleration::Apple { unified_memory_mb } => {
unified_memory_mb.saturating_sub(OS_RESERVE_MB)
}
SupportedAcceleration::Cuda { device_memory_mb } => {
device_memory_mb.unwrap_or(hw.total_ram_mb)
}
_ => hw.total_ram_mb.saturating_sub(OS_RESERVE_MB),
}
}
fn backend_overhead_mb(hw: &HardwareInfo) -> u64 {
match hw.supported_acceleration() {
SupportedAcceleration::Apple { .. } => OVERHEAD_METAL_MB,
SupportedAcceleration::Cuda { .. } => OVERHEAD_CUDA_MB,
_ => OVERHEAD_CPU_MB,
}
}
fn param_billions_total(m: &ModelSchema) -> f32 {
if let Some(b) = parse_leading_billions(&m.param_count) {
return b;
}
let size = m.size_mb();
if size > 0 {
(size as f32 / 600.0).max(0.1)
} else {
0.0
}
}
fn param_billions_active(m: &ModelSchema) -> f32 {
if let Some(active) = m
.param_count
.split_once('(')
.and_then(|(_, rest)| rest.split_once("active"))
.and_then(|(num, _)| parse_leading_billions(num))
{
return active;
}
param_billions_total(m)
}
fn parse_leading_billions(s: &str) -> Option<f32> {
let s = s.trim();
let num: String = s
.chars()
.take_while(|c| c.is_ascii_digit() || *c == '.')
.collect();
let v: f32 = num.parse().ok()?;
if s[num.len()..].trim_start().to_ascii_lowercase().starts_with('m') {
Some(v / 1000.0)
} else {
Some(v)
}
}
fn rationale(
m: &ModelSchema,
hw: &HardwareInfo,
use_case: UseCase,
tier: QualityTier,
fit: FitStatus,
quality: f32,
) -> String {
let purpose = use_case_purpose(use_case);
let machine = match hw.supported_acceleration() {
SupportedAcceleration::Apple { unified_memory_mb } => {
format!("your {} GB Apple Silicon Mac (Metal)", unified_memory_mb / 1024)
}
SupportedAcceleration::Cuda { device_memory_mb } => match device_memory_mb {
Some(mb) => format!("your {} GB NVIDIA GPU (CUDA)", mb / 1024),
None => "your NVIDIA GPU (CUDA)".to_string(),
},
SupportedAcceleration::UnsupportedDiscreteGpu { .. } | SupportedAcceleration::Cpu => {
format!("your {} GB machine (CPU)", hw.total_ram_mb / 1024)
}
};
match fit {
FitStatus::ServerProvided if m.is_remote() => format!(
"{}: cloud model for {} — runs on Parslee's servers, nothing to download",
m.name, purpose
),
FitStatus::ServerProvided => format!(
"{}: served externally for {} — no local memory needed",
m.name, purpose
),
_ => {
let tier_word = match tier {
QualityTier::Fastest => "fastest",
QualityTier::Balanced => "best-balanced",
QualityTier::MostCapable => "most capable",
};
let quality_note = if quality >= 0.7 {
"high-quality "
} else {
""
};
let size = if m.size_mb() >= 1024 {
format!("{:.1} GB download", m.size_mb() as f64 / 1024.0)
} else {
format!("{} MB download", m.size_mb())
};
format!(
"{}: the {} {}{} model that fits {} ({})",
m.name, tier_word, quality_note, purpose, machine, size
)
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::hardware::{GpuBackend, GpuDevice, GpuVendor};
use crate::schema::{CostModel, ModelCapability, ModelSource, PerformanceEnvelope};
fn hw(accel_backend: GpuBackend, ram_mb: u64, gpu_mb: Option<u64>) -> HardwareInfo {
HardwareInfo {
os: "test".into(),
arch: "test".into(),
cpu_cores: 8,
total_ram_mb: ram_mb,
gpu_backend: accel_backend,
gpu_memory_mb: gpu_mb,
gpu_devices: vec![],
recommended_model: String::new(),
recommended_context: 4096,
max_model_mb: 0,
}
}
fn mac(ram_gb: u64) -> HardwareInfo {
hw(GpuBackend::Metal, ram_gb * 1024, None)
}
fn local_model(id: &str, name: &str, params: &str, size_mb: u64) -> ModelSchema {
ModelSchema {
id: id.into(),
name: name.into(),
provider: "qwen".into(),
family: "qwen3".into(),
version: String::new(),
capabilities: vec![ModelCapability::Generate, ModelCapability::Code],
context_length: 32768,
param_count: params.into(),
quantization: Some("Q4_K_M".into()),
performance: PerformanceEnvelope::default(),
cost: CostModel {
size_mb: Some(size_mb),
ram_mb: Some(size_mb),
..Default::default()
},
source: ModelSource::Local {
hf_repo: "x/y".into(),
hf_filename: "m.gguf".into(),
tokenizer_repo: "x/y".into(),
},
tags: vec![],
supported_params: vec![],
public_benchmarks: vec![],
trust_tier: TrustTier::Curated,
deprecated: false,
available: false,
}
}
fn catalog() -> Vec<ModelSchema> {
vec![
local_model("qwen/qwen3-0.6b", "Qwen3-0.6B", "0.6B", 650),
local_model("qwen/qwen3-4b", "Qwen3-4B", "4B", 2500),
local_model("qwen/qwen3-8b", "Qwen3-8B", "8B", 4900),
local_model("qwen/qwen3-30b", "Qwen3-30B-A3B", "30B (3B active)", 17000),
]
}
fn refs(v: &[ModelSchema]) -> Vec<&ModelSchema> {
v.iter().collect()
}
#[test]
fn fastest_prefers_the_small_model() {
let cat = catalog();
let recs = recommend(
&refs(&cat),
&mac(36),
UseCase::Coding,
QualityTier::Fastest,
Privacy::OnDevice,
).picks;
assert_eq!(recs[0].display_name, "Qwen3-0.6B");
}
#[test]
fn most_capable_prefers_the_big_model_when_it_fits() {
let cat = catalog();
let recs = recommend(
&refs(&cat),
&mac(36), UseCase::Coding,
QualityTier::MostCapable,
Privacy::OnDevice,
).picks;
assert_eq!(recs[0].display_name, "Qwen3-30B-A3B");
assert_eq!(recs[0].fit, FitStatus::Fits);
}
#[test]
fn too_big_models_are_excluded_on_small_machines() {
let cat = catalog();
let recs = recommend(
&refs(&cat),
&mac(8), UseCase::Coding,
QualityTier::MostCapable,
Privacy::OnDevice,
).picks;
let names: Vec<&str> = recs.iter().map(|r| r.display_name.as_str()).collect();
assert!(!names.contains(&"Qwen3-30B-A3B"), "30B must not fit 8GB");
assert!(recs.iter().all(|r| r.fit == FitStatus::Fits));
assert!(!recs.is_empty(), "the 0.6B model should still be offered");
}
#[test]
fn balanced_picks_a_capable_model_that_fits() {
let cat = catalog();
let recs = recommend(
&refs(&cat),
&mac(16),
UseCase::Coding,
QualityTier::Balanced,
Privacy::OnDevice,
).picks;
assert!(matches!(
recs[0].display_name.as_str(),
"Qwen3-4B" | "Qwen3-8B"
));
}
#[test]
fn search_only_returns_embedding_models() {
let mut cat = catalog();
let mut embed = local_model("qwen/embed", "Qwen3-Embedding", "0.6B", 640);
embed.capabilities = vec![ModelCapability::Embed];
cat.push(embed);
let recs = recommend(
&refs(&cat),
&mac(16),
UseCase::Search,
QualityTier::Balanced,
Privacy::OnDevice,
).picks;
assert_eq!(recs.len(), 1, "only the embed model is in the Search lane");
assert_eq!(recs[0].display_name, "Qwen3-Embedding");
assert_eq!(recs[0].role, UseCaseRole::Retrieval);
}
#[test]
fn deprecated_models_are_never_recommended() {
let mut cat = catalog();
cat[1].deprecated = true; let recs = recommend(
&refs(&cat),
&mac(16),
UseCase::Coding,
QualityTier::Balanced,
Privacy::OnDevice,
).picks;
assert!(recs.iter().all(|r| r.display_name != "Qwen3-4B"));
}
#[test]
fn on_device_excludes_cloud_but_cloud_ok_includes_it_with_consent() {
let mut cat = catalog();
let mut cloud = local_model("anthropic/sonnet", "Claude Sonnet", "", 0);
cloud.capabilities = vec![ModelCapability::Generate, ModelCapability::Code];
cloud.source = ModelSource::RemoteApi {
endpoint: "https://api".into(),
api_key_env: "K".into(),
api_key_envs: vec![],
api_version: None,
protocol: crate::schema::ApiProtocol::Anthropic,
};
cloud.public_benchmarks = vec![crate::schema::BenchmarkScore {
name: "SWE-bench".into(),
score: 0.7,
harness: None,
source_url: None,
measured_at: None,
}];
cat.push(cloud);
let on_device = recommend(
&refs(&cat),
&mac(16),
UseCase::Coding,
QualityTier::MostCapable,
Privacy::OnDevice,
)
.picks;
assert!(on_device.iter().all(|r| r.is_local));
let cloud_ok = recommend(
&refs(&cat),
&mac(16),
UseCase::Coding,
QualityTier::MostCapable,
Privacy::CloudOk,
)
.picks;
let claude = cloud_ok
.iter()
.find(|r| r.display_name == "Claude Sonnet")
.expect("cloud model eligible under CloudOk");
assert!(claude.requires_cloud_consent);
assert_eq!(claude.fit, FitStatus::ServerProvided);
}
#[test]
fn metal_only_model_excluded_on_cpu_host() {
let mut cat = catalog();
let mut mlx = local_model("mlx/qwen3-4b", "Qwen3-4B-MLX", "4B", 2400);
mlx.source = ModelSource::Mlx {
hf_repo: "mlx-community/x".into(),
hf_weight_file: None,
};
cat.push(mlx);
let recs = recommend(
&refs(&cat),
&hw(GpuBackend::Cpu, 32 * 1024, None),
UseCase::Coding,
QualityTier::Balanced,
Privacy::OnDevice,
).picks;
assert!(recs.iter().all(|r| r.display_name != "Qwen3-4B-MLX"));
}
#[test]
fn ranking_is_deterministic() {
let cat = catalog();
let a = recommend(
&refs(&cat),
&mac(16),
UseCase::Assistant,
QualityTier::Balanced,
Privacy::OnDevice,
);
let b = recommend(
&refs(&cat),
&mac(16),
UseCase::Assistant,
QualityTier::Balanced,
Privacy::OnDevice,
);
let ids_a: Vec<&str> = a.picks.iter().map(|r| r.model_id.as_str()).collect();
let ids_b: Vec<&str> = b.picks.iter().map(|r| r.model_id.as_str()).collect();
assert_eq!(ids_a, ids_b);
}
#[test]
fn rationale_is_plain_language_no_jargon() {
let cat = catalog();
let recs = recommend(
&refs(&cat),
&mac(36),
UseCase::Coding,
QualityTier::Balanced,
Privacy::OnDevice,
).picks;
let r = &recs[0].rationale;
assert!(!r.contains("Q4_K_M"), "no quantization jargon");
assert!(!r.contains("gguf") && !r.contains("hf_repo"));
assert!(r.contains("coding"), "states the purpose");
}
#[test]
fn all_too_big_surfaces_needs_more_ram_with_a_note() {
let cat = catalog();
let set = recommend(
&refs(&cat),
&hw(GpuBackend::Cpu, 2 * 1024, None),
UseCase::Coding,
QualityTier::Balanced,
Privacy::OnDevice,
);
assert!(set.picks.is_empty(), "nothing should fit 2 GB");
assert!(
!set.not_enough_memory.is_empty(),
"too-big models surfaced, not dropped"
);
let note = set.note.expect("empty picks must carry a note");
assert!(note.contains("fits"), "note explains the no-fit: {note}");
assert_eq!(set.not_enough_memory[0].fit, FitStatus::TooBig);
}
#[test]
fn all_deprecated_gives_generic_note_not_a_memory_note() {
let mut cat = catalog();
for m in &mut cat {
m.deprecated = true;
}
let set = recommend(
&refs(&cat),
&mac(36), UseCase::Coding,
QualityTier::Balanced,
Privacy::OnDevice,
);
assert!(set.picks.is_empty());
assert!(set.not_enough_memory.is_empty());
let note = set.note.expect("must explain");
assert!(
!note.contains("fits") && !note.contains("memory"),
"deprecated-only must not claim a memory problem: {note}"
);
}
#[test]
fn not_enough_memory_is_ordered_deterministically() {
let cat = catalog();
let mk = || {
recommend(
&refs(&cat),
&hw(GpuBackend::Cpu, 3 * 1024, None), UseCase::Coding,
QualityTier::Balanced,
Privacy::OnDevice,
)
.not_enough_memory
.into_iter()
.map(|r| r.model_id)
.collect::<Vec<_>>()
};
assert!(mk().len() >= 2, "several models should be too big for 3 GB");
assert_eq!(mk(), mk(), "too-big ordering must be deterministic");
}
#[test]
fn empty_registry_returns_empty_with_a_note() {
let set = recommend(
&[],
&mac(16),
UseCase::Assistant,
QualityTier::Balanced,
Privacy::OnDevice,
);
assert!(set.picks.is_empty());
assert!(set.not_enough_memory.is_empty());
assert!(set.note.is_some(), "no-model case must explain itself");
}
#[test]
fn cuda_box_sizes_against_vram() {
let cat = catalog();
let h = hw(GpuBackend::Cuda, 64 * 1024, Some(24 * 1024));
let recs = recommend(
&refs(&cat),
&h,
UseCase::Coding,
QualityTier::MostCapable,
Privacy::OnDevice,
)
.picks;
assert_eq!(recs[0].display_name, "Qwen3-30B-A3B");
}
#[test]
fn unsupported_discrete_gpu_uses_system_ram_not_vram() {
let cat = catalog();
let mut h = hw(GpuBackend::Cpu, 16 * 1024, None);
h.gpu_devices = vec![GpuDevice {
vendor: GpuVendor::Nvidia,
name: "GeForce RTX 4090".into(),
memory_mb: Some(24_000),
}];
assert!(matches!(
h.supported_acceleration(),
crate::hardware::SupportedAcceleration::UnsupportedDiscreteGpu { .. }
));
let recs = recommend(
&refs(&cat),
&h,
UseCase::Coding,
QualityTier::MostCapable,
Privacy::OnDevice,
)
.picks;
assert!(
recs.iter().all(|r| r.display_name != "Qwen3-30B-A3B"),
"17 GB model must not fit a 16 GB-RAM CPU host"
);
assert!(!recs.is_empty(), "smaller models still fit");
}
#[test]
fn recommendation_set_wire_shape_is_snake_case_and_stable() {
let cat = catalog();
let set = recommend(
&refs(&cat),
&mac(36),
UseCase::Coding,
QualityTier::Balanced,
Privacy::OnDevice,
);
let json = serde_json::to_string(&set).unwrap();
assert!(json.contains("\"picks\""));
assert!(json.contains("\"not_enough_memory\""));
assert!(json.contains("\"model_id\""));
assert!(json.contains("\"already_installed\""));
assert!(json.contains("\"requires_cloud_consent\""));
assert!(json.contains("\"fit\""));
}
#[test]
fn blank_param_count_estimates_from_size_not_zero() {
let mut m = local_model("x/unknown", "Unknown-Model", "", 4900);
m.param_count = String::new();
assert!(
param_billions_total(&m) > 5.0,
"4.9 GB ⇒ roughly an 8B model, not 0B"
);
}
}