1use serde::{Deserialize, Serialize};
20
21use crate::hardware::{HardwareInfo, SupportedAcceleration};
22use crate::intent::{Privacy, QualityTier, UseCase, UseCaseRole};
23use crate::schema::{ModelSchema, TrustTier};
24
25#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
27#[serde(rename_all = "snake_case")]
28pub enum FitStatus {
29 Fits,
31 TooBig,
33 ServerProvided,
36 Unknown,
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct Recommendation {
43 pub model_id: String,
45 pub display_name: String,
47 pub role: UseCaseRole,
49 pub rationale: String,
52 pub download_mb: u64,
54 pub already_installed: bool,
56 pub fit: FitStatus,
58 pub acceleration: SupportedAcceleration,
60 pub is_local: bool,
62 pub requires_cloud_consent: bool,
65 pub trust_tier: TrustTier,
67 pub score: f32,
69}
70
71const OS_RESERVE_MB: u64 = 3072;
75const OVERHEAD_METAL_MB: u64 = 512;
77const OVERHEAD_CUDA_MB: u64 = 512;
78const OVERHEAD_CPU_MB: u64 = 1024;
79const SAFETY_MARGIN_MB: u64 = 1024;
81const FIT_CONTEXT_TOKENS: usize = 8192;
85
86#[derive(Debug, Clone, Serialize, Deserialize)]
89pub struct RecommendationSet {
90 pub picks: Vec<Recommendation>,
93 pub not_enough_memory: Vec<Recommendation>,
96 pub note: Option<String>,
99}
100
101pub fn recommend(
108 models: &[&ModelSchema],
109 hw: &HardwareInfo,
110 use_case: UseCase,
111 tier: QualityTier,
112 privacy: Privacy,
113) -> RecommendationSet {
114 let accel = hw.supported_acceleration();
115 let sort = |v: &mut Vec<Recommendation>| {
116 v.sort_by(|a, b| {
118 b.score
119 .partial_cmp(&a.score)
120 .unwrap_or(std::cmp::Ordering::Equal)
121 .then(b.already_installed.cmp(&a.already_installed))
122 .then(a.download_mb.cmp(&b.download_mb))
123 .then(a.model_id.cmp(&b.model_id))
124 });
125 };
126
127 let (mut picks, mut not_enough_memory): (Vec<_>, Vec<_>) = models
128 .iter()
129 .filter(|m| passes_base_filter(m, hw, use_case, privacy))
130 .map(|m| build_recommendation(m, hw, &accel, use_case, tier))
131 .partition(|r| r.fit != FitStatus::TooBig);
132 sort(&mut picks);
133 sort(&mut not_enough_memory);
134
135 let note = explain_if_needed(&picks, ¬_enough_memory, hw, use_case, tier, privacy);
136 RecommendationSet {
137 picks,
138 not_enough_memory,
139 note,
140 }
141}
142
143fn passes_base_filter(
147 m: &ModelSchema,
148 hw: &HardwareInfo,
149 use_case: UseCase,
150 privacy: Privacy,
151) -> bool {
152 if m.deprecated {
153 return false;
154 }
155 if !use_case
157 .required_capabilities()
158 .iter()
159 .all(|c| m.has_capability(*c))
160 {
161 return false;
162 }
163 if privacy == Privacy::OnDevice && !m.is_local() {
165 return false;
166 }
167 if m.requires_apple_silicon()
169 && !matches!(hw.supported_acceleration(), SupportedAcceleration::Apple { .. })
170 {
171 return false;
172 }
173 true
174}
175
176fn explain_if_needed(
178 picks: &[Recommendation],
179 too_big: &[Recommendation],
180 hw: &HardwareInfo,
181 use_case: UseCase,
182 tier: QualityTier,
183 privacy: Privacy,
184) -> Option<String> {
185 let purpose = use_case_purpose(use_case);
186 if picks.is_empty() {
187 let ram_gb = hw.total_ram_mb / 1024;
188 return Some(if !too_big.is_empty() {
189 match privacy {
190 Privacy::OnDevice => format!(
191 "No on-device model for {purpose} fits your {ram_gb} GB machine. \
192 Free up memory, pick a smaller tier, or allow cloud models."
193 ),
194 Privacy::CloudOk => format!(
195 "No local model for {purpose} fits your {ram_gb} GB machine, and no \
196 cloud model is configured. Add an API key or free up memory."
197 ),
198 }
199 } else {
200 format!("No model available for {purpose} on this machine.")
201 });
202 }
203 if picks[0].requires_cloud_consent {
206 return Some(format!(
207 "The best {purpose} pick runs in the cloud and needs your OK before first use. \
208 {} fits locally if you prefer on-device.",
209 picks
210 .iter()
211 .find(|p| p.is_local)
212 .map(|p| p.display_name.as_str())
213 .unwrap_or("No local model")
214 ));
215 }
216 let _ = tier;
217 None
218}
219
220fn use_case_purpose(use_case: UseCase) -> &'static str {
221 match use_case {
222 UseCase::Assistant => "chat & general help",
223 UseCase::Coding => "coding",
224 UseCase::Summarize => "summarizing",
225 UseCase::Vision => "understanding images",
226 UseCase::Transcription => "transcription",
227 UseCase::Search => "semantic search",
228 }
229}
230
231fn build_recommendation(
232 m: &ModelSchema,
233 hw: &HardwareInfo,
234 accel: &SupportedAcceleration,
235 use_case: UseCase,
236 tier: QualityTier,
237) -> Recommendation {
238 let fit = fit_status(m, hw);
239 let quality = quality_score(m);
240 let latency = latency_score(m, accel);
241 let pressure = memory_pressure(m, hw);
242 let w = tier.weights();
243 let mut score =
245 w.quality * quality + w.latency * latency + w.memory_pressure * (1.0 - pressure);
246 let pref_hits = use_case
248 .preferred_capabilities()
249 .iter()
250 .filter(|c| m.has_capability(**c))
251 .count();
252 score += 0.05 * pref_hits as f32;
253
254 let is_local = m.is_local();
255 Recommendation {
256 model_id: m.id.clone(),
257 display_name: m.name.clone(),
258 role: use_case.role(),
259 rationale: rationale(m, hw, use_case, tier, fit, quality),
260 download_mb: m.size_mb(),
261 already_installed: m.available,
262 fit,
263 acceleration: accel.clone(),
264 is_local,
265 requires_cloud_consent: !is_local,
266 trust_tier: m.trust_tier,
267 score,
268 }
269}
270
271fn quality_score(m: &ModelSchema) -> f32 {
275 if !m.public_benchmarks.is_empty() {
276 let sum: f64 = m.public_benchmarks.iter().map(|b| b.score).sum();
277 return (sum / m.public_benchmarks.len() as f64).clamp(0.0, 1.0) as f32;
278 }
279 let b = param_billions_total(m).max(0.1);
282 (b / (b + 7.0)).clamp(0.0, 1.0) as f32
283}
284
285fn latency_score(m: &ModelSchema, accel: &SupportedAcceleration) -> f32 {
288 let b = param_billions_active(m).max(0.1);
289 let size_term = (8.0 / (b + 8.0)) as f32;
291 let accel_bonus = match accel {
292 SupportedAcceleration::Apple { .. } | SupportedAcceleration::Cuda { .. } => 0.1,
293 _ => 0.0,
294 };
295 (size_term + accel_bonus).clamp(0.0, 1.0)
296}
297
298fn memory_pressure(m: &ModelSchema, hw: &HardwareInfo) -> f32 {
301 let budget = memory_budget_mb(hw);
302 if budget == 0 {
303 return 1.0;
304 }
305 (memory_required_mb(m, hw) as f32 / budget as f32).clamp(0.0, 1.5)
306}
307
308fn fit_status(m: &ModelSchema, hw: &HardwareInfo) -> FitStatus {
310 if m.is_vllm_mlx() || m.is_remote() || m.is_delegated() {
312 return FitStatus::ServerProvided;
313 }
314 if m.is_foundation_models() {
316 return FitStatus::Fits;
317 }
318 if m.size_mb() == 0 && m.ram_mb() == 0 {
319 return FitStatus::Unknown;
320 }
321 if memory_required_mb(m, hw) + SAFETY_MARGIN_MB <= memory_budget_mb(hw) {
322 FitStatus::Fits
323 } else {
324 FitStatus::TooBig
325 }
326}
327
328fn memory_required_mb(m: &ModelSchema, hw: &HardwareInfo) -> u64 {
331 let weights = m.ram_mb().max(m.size_mb());
332 let kv = kv_cache_mb(m, FIT_CONTEXT_TOKENS);
333 weights + kv + backend_overhead_mb(hw)
334}
335
336fn kv_cache_mb(m: &ModelSchema, context_tokens: usize) -> u64 {
339 let per_1k = (param_billions_active(m) as f64 * 0.12).max(0.05);
341 ((context_tokens as f64 / 1000.0) * per_1k).ceil() as u64
342}
343
344fn memory_budget_mb(hw: &HardwareInfo) -> u64 {
346 match hw.supported_acceleration() {
347 SupportedAcceleration::Apple { unified_memory_mb } => {
348 unified_memory_mb.saturating_sub(OS_RESERVE_MB)
349 }
350 SupportedAcceleration::Cuda { device_memory_mb } => {
351 device_memory_mb.unwrap_or(hw.total_ram_mb)
352 }
353 _ => hw.total_ram_mb.saturating_sub(OS_RESERVE_MB),
355 }
356}
357
358fn backend_overhead_mb(hw: &HardwareInfo) -> u64 {
359 match hw.supported_acceleration() {
360 SupportedAcceleration::Apple { .. } => OVERHEAD_METAL_MB,
361 SupportedAcceleration::Cuda { .. } => OVERHEAD_CUDA_MB,
362 _ => OVERHEAD_CPU_MB,
363 }
364}
365
366fn param_billions_total(m: &ModelSchema) -> f32 {
371 if let Some(b) = parse_leading_billions(&m.param_count) {
372 return b;
373 }
374 let size = m.size_mb();
375 if size > 0 {
376 (size as f32 / 600.0).max(0.1)
377 } else {
378 0.0
379 }
380}
381
382fn param_billions_active(m: &ModelSchema) -> f32 {
385 if let Some(active) = m
386 .param_count
387 .split_once('(')
388 .and_then(|(_, rest)| rest.split_once("active"))
389 .and_then(|(num, _)| parse_leading_billions(num))
390 {
391 return active;
392 }
393 param_billions_total(m)
394}
395
396fn parse_leading_billions(s: &str) -> Option<f32> {
397 let s = s.trim();
398 let num: String = s
399 .chars()
400 .take_while(|c| c.is_ascii_digit() || *c == '.')
401 .collect();
402 let v: f32 = num.parse().ok()?;
403 if s[num.len()..].trim_start().to_ascii_lowercase().starts_with('m') {
405 Some(v / 1000.0)
406 } else {
407 Some(v)
408 }
409}
410
411fn rationale(
414 m: &ModelSchema,
415 hw: &HardwareInfo,
416 use_case: UseCase,
417 tier: QualityTier,
418 fit: FitStatus,
419 quality: f32,
420) -> String {
421 let purpose = use_case_purpose(use_case);
422 let machine = match hw.supported_acceleration() {
423 SupportedAcceleration::Apple { unified_memory_mb } => {
424 format!("your {} GB Apple Silicon Mac (Metal)", unified_memory_mb / 1024)
425 }
426 SupportedAcceleration::Cuda { device_memory_mb } => match device_memory_mb {
427 Some(mb) => format!("your {} GB NVIDIA GPU (CUDA)", mb / 1024),
428 None => "your NVIDIA GPU (CUDA)".to_string(),
429 },
430 SupportedAcceleration::UnsupportedDiscreteGpu { .. } | SupportedAcceleration::Cpu => {
431 format!("your {} GB machine (CPU)", hw.total_ram_mb / 1024)
432 }
433 };
434
435 match fit {
436 FitStatus::ServerProvided if m.is_remote() => format!(
437 "{}: cloud model for {} — runs on Parslee's servers, nothing to download",
438 m.name, purpose
439 ),
440 FitStatus::ServerProvided => format!(
441 "{}: served externally for {} — no local memory needed",
442 m.name, purpose
443 ),
444 _ => {
445 let tier_word = match tier {
446 QualityTier::Fastest => "fastest",
447 QualityTier::Balanced => "best-balanced",
448 QualityTier::MostCapable => "most capable",
449 };
450 let quality_note = if quality >= 0.7 {
451 "high-quality "
452 } else {
453 ""
454 };
455 let size = if m.size_mb() >= 1024 {
456 format!("{:.1} GB download", m.size_mb() as f64 / 1024.0)
457 } else {
458 format!("{} MB download", m.size_mb())
459 };
460 format!(
461 "{}: the {} {}{} model that fits {} ({})",
462 m.name, tier_word, quality_note, purpose, machine, size
463 )
464 }
465 }
466}
467
468#[cfg(test)]
469mod tests {
470 use super::*;
471 use crate::hardware::{GpuBackend, GpuDevice, GpuVendor};
472 use crate::schema::{CostModel, ModelCapability, ModelSource, PerformanceEnvelope};
473
474 fn hw(accel_backend: GpuBackend, ram_mb: u64, gpu_mb: Option<u64>) -> HardwareInfo {
475 HardwareInfo {
476 os: "test".into(),
477 arch: "test".into(),
478 cpu_cores: 8,
479 total_ram_mb: ram_mb,
480 gpu_backend: accel_backend,
481 gpu_memory_mb: gpu_mb,
482 gpu_devices: vec![],
483 recommended_model: String::new(),
484 recommended_context: 4096,
485 max_model_mb: 0,
486 }
487 }
488
489 fn mac(ram_gb: u64) -> HardwareInfo {
490 hw(GpuBackend::Metal, ram_gb * 1024, None)
492 }
493
494 fn local_model(id: &str, name: &str, params: &str, size_mb: u64) -> ModelSchema {
495 ModelSchema {
496 id: id.into(),
497 name: name.into(),
498 provider: "qwen".into(),
499 family: "qwen3".into(),
500 version: String::new(),
501 capabilities: vec![ModelCapability::Generate, ModelCapability::Code],
502 context_length: 32768,
503 param_count: params.into(),
504 quantization: Some("Q4_K_M".into()),
505 performance: PerformanceEnvelope::default(),
506 cost: CostModel {
507 size_mb: Some(size_mb),
508 ram_mb: Some(size_mb),
509 ..Default::default()
510 },
511 source: ModelSource::Local {
512 hf_repo: "x/y".into(),
513 hf_filename: "m.gguf".into(),
514 tokenizer_repo: "x/y".into(),
515 },
516 tags: vec![],
517 supported_params: vec![],
518 public_benchmarks: vec![],
519 trust_tier: TrustTier::Curated,
520 deprecated: false,
521 available: false,
522 }
523 }
524
525 fn catalog() -> Vec<ModelSchema> {
526 vec![
527 local_model("qwen/qwen3-0.6b", "Qwen3-0.6B", "0.6B", 650),
528 local_model("qwen/qwen3-4b", "Qwen3-4B", "4B", 2500),
529 local_model("qwen/qwen3-8b", "Qwen3-8B", "8B", 4900),
530 local_model("qwen/qwen3-30b", "Qwen3-30B-A3B", "30B (3B active)", 17000),
531 ]
532 }
533
534 fn refs(v: &[ModelSchema]) -> Vec<&ModelSchema> {
535 v.iter().collect()
536 }
537
538 #[test]
539 fn fastest_prefers_the_small_model() {
540 let cat = catalog();
541 let recs = recommend(
542 &refs(&cat),
543 &mac(36),
544 UseCase::Coding,
545 QualityTier::Fastest,
546 Privacy::OnDevice,
547 ).picks;
548 assert_eq!(recs[0].display_name, "Qwen3-0.6B");
549 }
550
551 #[test]
552 fn most_capable_prefers_the_big_model_when_it_fits() {
553 let cat = catalog();
554 let recs = recommend(
555 &refs(&cat),
556 &mac(36), UseCase::Coding,
558 QualityTier::MostCapable,
559 Privacy::OnDevice,
560 ).picks;
561 assert_eq!(recs[0].display_name, "Qwen3-30B-A3B");
562 assert_eq!(recs[0].fit, FitStatus::Fits);
563 }
564
565 #[test]
566 fn too_big_models_are_excluded_on_small_machines() {
567 let cat = catalog();
568 let recs = recommend(
569 &refs(&cat),
570 &mac(8), UseCase::Coding,
572 QualityTier::MostCapable,
573 Privacy::OnDevice,
574 ).picks;
575 let names: Vec<&str> = recs.iter().map(|r| r.display_name.as_str()).collect();
576 assert!(!names.contains(&"Qwen3-30B-A3B"), "30B must not fit 8GB");
577 assert!(recs.iter().all(|r| r.fit == FitStatus::Fits));
578 assert!(!recs.is_empty(), "the 0.6B model should still be offered");
579 }
580
581 #[test]
582 fn balanced_picks_a_capable_model_that_fits() {
583 let cat = catalog();
584 let recs = recommend(
585 &refs(&cat),
586 &mac(16),
587 UseCase::Coding,
588 QualityTier::Balanced,
589 Privacy::OnDevice,
590 ).picks;
591 assert!(matches!(
594 recs[0].display_name.as_str(),
595 "Qwen3-4B" | "Qwen3-8B"
596 ));
597 }
598
599 #[test]
600 fn search_only_returns_embedding_models() {
601 let mut cat = catalog();
602 let mut embed = local_model("qwen/embed", "Qwen3-Embedding", "0.6B", 640);
603 embed.capabilities = vec![ModelCapability::Embed];
604 cat.push(embed);
605 let recs = recommend(
606 &refs(&cat),
607 &mac(16),
608 UseCase::Search,
609 QualityTier::Balanced,
610 Privacy::OnDevice,
611 ).picks;
612 assert_eq!(recs.len(), 1, "only the embed model is in the Search lane");
613 assert_eq!(recs[0].display_name, "Qwen3-Embedding");
614 assert_eq!(recs[0].role, UseCaseRole::Retrieval);
615 }
616
617 #[test]
618 fn deprecated_models_are_never_recommended() {
619 let mut cat = catalog();
620 cat[1].deprecated = true; let recs = recommend(
622 &refs(&cat),
623 &mac(16),
624 UseCase::Coding,
625 QualityTier::Balanced,
626 Privacy::OnDevice,
627 ).picks;
628 assert!(recs.iter().all(|r| r.display_name != "Qwen3-4B"));
629 }
630
631 #[test]
632 fn on_device_excludes_cloud_but_cloud_ok_includes_it_with_consent() {
633 let mut cat = catalog();
634 let mut cloud = local_model("anthropic/sonnet", "Claude Sonnet", "", 0);
635 cloud.capabilities = vec![ModelCapability::Generate, ModelCapability::Code];
636 cloud.source = ModelSource::RemoteApi {
637 endpoint: "https://api".into(),
638 api_key_env: "K".into(),
639 api_key_envs: vec![],
640 api_version: None,
641 protocol: crate::schema::ApiProtocol::Anthropic,
642 };
643 cloud.public_benchmarks = vec![crate::schema::BenchmarkScore {
644 name: "SWE-bench".into(),
645 score: 0.7,
646 harness: None,
647 source_url: None,
648 measured_at: None,
649 }];
650 cat.push(cloud);
651
652 let on_device = recommend(
653 &refs(&cat),
654 &mac(16),
655 UseCase::Coding,
656 QualityTier::MostCapable,
657 Privacy::OnDevice,
658 )
659 .picks;
660 assert!(on_device.iter().all(|r| r.is_local));
661
662 let cloud_ok = recommend(
663 &refs(&cat),
664 &mac(16),
665 UseCase::Coding,
666 QualityTier::MostCapable,
667 Privacy::CloudOk,
668 )
669 .picks;
670 let claude = cloud_ok
671 .iter()
672 .find(|r| r.display_name == "Claude Sonnet")
673 .expect("cloud model eligible under CloudOk");
674 assert!(claude.requires_cloud_consent);
675 assert_eq!(claude.fit, FitStatus::ServerProvided);
676 }
677
678 #[test]
679 fn metal_only_model_excluded_on_cpu_host() {
680 let mut cat = catalog();
681 let mut mlx = local_model("mlx/qwen3-4b", "Qwen3-4B-MLX", "4B", 2400);
682 mlx.source = ModelSource::Mlx {
683 hf_repo: "mlx-community/x".into(),
684 hf_weight_file: None,
685 };
686 cat.push(mlx);
687 let recs = recommend(
689 &refs(&cat),
690 &hw(GpuBackend::Cpu, 32 * 1024, None),
691 UseCase::Coding,
692 QualityTier::Balanced,
693 Privacy::OnDevice,
694 ).picks;
695 assert!(recs.iter().all(|r| r.display_name != "Qwen3-4B-MLX"));
696 }
697
698 #[test]
699 fn ranking_is_deterministic() {
700 let cat = catalog();
701 let a = recommend(
702 &refs(&cat),
703 &mac(16),
704 UseCase::Assistant,
705 QualityTier::Balanced,
706 Privacy::OnDevice,
707 );
708 let b = recommend(
709 &refs(&cat),
710 &mac(16),
711 UseCase::Assistant,
712 QualityTier::Balanced,
713 Privacy::OnDevice,
714 );
715 let ids_a: Vec<&str> = a.picks.iter().map(|r| r.model_id.as_str()).collect();
716 let ids_b: Vec<&str> = b.picks.iter().map(|r| r.model_id.as_str()).collect();
717 assert_eq!(ids_a, ids_b);
718 }
719
720 #[test]
721 fn rationale_is_plain_language_no_jargon() {
722 let cat = catalog();
723 let recs = recommend(
724 &refs(&cat),
725 &mac(36),
726 UseCase::Coding,
727 QualityTier::Balanced,
728 Privacy::OnDevice,
729 ).picks;
730 let r = &recs[0].rationale;
731 assert!(!r.contains("Q4_K_M"), "no quantization jargon");
732 assert!(!r.contains("gguf") && !r.contains("hf_repo"));
733 assert!(r.contains("coding"), "states the purpose");
734 }
735
736 #[test]
737 fn all_too_big_surfaces_needs_more_ram_with_a_note() {
738 let cat = catalog();
740 let set = recommend(
741 &refs(&cat),
742 &hw(GpuBackend::Cpu, 2 * 1024, None),
743 UseCase::Coding,
744 QualityTier::Balanced,
745 Privacy::OnDevice,
746 );
747 assert!(set.picks.is_empty(), "nothing should fit 2 GB");
748 assert!(
749 !set.not_enough_memory.is_empty(),
750 "too-big models surfaced, not dropped"
751 );
752 let note = set.note.expect("empty picks must carry a note");
753 assert!(note.contains("fits"), "note explains the no-fit: {note}");
754 assert_eq!(set.not_enough_memory[0].fit, FitStatus::TooBig);
756 }
757
758 #[test]
759 fn all_deprecated_gives_generic_note_not_a_memory_note() {
760 let mut cat = catalog();
764 for m in &mut cat {
765 m.deprecated = true;
766 }
767 let set = recommend(
768 &refs(&cat),
769 &mac(36), UseCase::Coding,
771 QualityTier::Balanced,
772 Privacy::OnDevice,
773 );
774 assert!(set.picks.is_empty());
775 assert!(set.not_enough_memory.is_empty());
776 let note = set.note.expect("must explain");
777 assert!(
778 !note.contains("fits") && !note.contains("memory"),
779 "deprecated-only must not claim a memory problem: {note}"
780 );
781 }
782
783 #[test]
784 fn not_enough_memory_is_ordered_deterministically() {
785 let cat = catalog();
786 let mk = || {
787 recommend(
788 &refs(&cat),
789 &hw(GpuBackend::Cpu, 3 * 1024, None), UseCase::Coding,
791 QualityTier::Balanced,
792 Privacy::OnDevice,
793 )
794 .not_enough_memory
795 .into_iter()
796 .map(|r| r.model_id)
797 .collect::<Vec<_>>()
798 };
799 assert!(mk().len() >= 2, "several models should be too big for 3 GB");
800 assert_eq!(mk(), mk(), "too-big ordering must be deterministic");
801 }
802
803 #[test]
804 fn empty_registry_returns_empty_with_a_note() {
805 let set = recommend(
806 &[],
807 &mac(16),
808 UseCase::Assistant,
809 QualityTier::Balanced,
810 Privacy::OnDevice,
811 );
812 assert!(set.picks.is_empty());
813 assert!(set.not_enough_memory.is_empty());
814 assert!(set.note.is_some(), "no-model case must explain itself");
815 }
816
817 #[test]
818 fn cuda_box_sizes_against_vram() {
819 let cat = catalog();
821 let h = hw(GpuBackend::Cuda, 64 * 1024, Some(24 * 1024));
822 let recs = recommend(
823 &refs(&cat),
824 &h,
825 UseCase::Coding,
826 QualityTier::MostCapable,
827 Privacy::OnDevice,
828 )
829 .picks;
830 assert_eq!(recs[0].display_name, "Qwen3-30B-A3B");
831 }
832
833 #[test]
834 fn unsupported_discrete_gpu_uses_system_ram_not_vram() {
835 let cat = catalog();
838 let mut h = hw(GpuBackend::Cpu, 16 * 1024, None);
839 h.gpu_devices = vec![GpuDevice {
840 vendor: GpuVendor::Nvidia,
841 name: "GeForce RTX 4090".into(),
842 memory_mb: Some(24_000),
843 }];
844 assert!(matches!(
846 h.supported_acceleration(),
847 crate::hardware::SupportedAcceleration::UnsupportedDiscreteGpu { .. }
848 ));
849 let recs = recommend(
850 &refs(&cat),
851 &h,
852 UseCase::Coding,
853 QualityTier::MostCapable,
854 Privacy::OnDevice,
855 )
856 .picks;
857 assert!(
858 recs.iter().all(|r| r.display_name != "Qwen3-30B-A3B"),
859 "17 GB model must not fit a 16 GB-RAM CPU host"
860 );
861 assert!(!recs.is_empty(), "smaller models still fit");
862 }
863
864 #[test]
865 fn recommendation_set_wire_shape_is_snake_case_and_stable() {
866 let cat = catalog();
868 let set = recommend(
869 &refs(&cat),
870 &mac(36),
871 UseCase::Coding,
872 QualityTier::Balanced,
873 Privacy::OnDevice,
874 );
875 let json = serde_json::to_string(&set).unwrap();
876 assert!(json.contains("\"picks\""));
877 assert!(json.contains("\"not_enough_memory\""));
878 assert!(json.contains("\"model_id\""));
879 assert!(json.contains("\"already_installed\""));
880 assert!(json.contains("\"requires_cloud_consent\""));
881 assert!(json.contains("\"fit\""));
882 }
883
884 #[test]
885 fn blank_param_count_estimates_from_size_not_zero() {
886 let mut m = local_model("x/unknown", "Unknown-Model", "", 4900);
889 m.param_count = String::new();
890 assert!(
891 param_billions_total(&m) > 5.0,
892 "4.9 GB ⇒ roughly an 8B model, not 0B"
893 );
894 }
895}