car-inference 0.13.0

//! Hardware detection — auto-configure models and context based on system capabilities.

use serde::{Deserialize, Serialize};

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HardwareInfo {
    pub os: String,
    pub arch: String,
    pub cpu_cores: usize,
    pub total_ram_mb: u64,
    /// Inference backend the build can actually drive — Metal on
    /// Apple Silicon (with `metal` feature), CUDA on NVIDIA (with
    /// `cuda` feature), CPU otherwise. Distinct from `gpu_devices`,
    /// which lists every GPU the OS sees regardless of whether CAR
    /// has a backend for it.
    pub gpu_backend: GpuBackend,
    pub gpu_memory_mb: Option<u64>,
    /// Every GPU the OS reports — vendor + name + memory if known.
    /// Populated even when `gpu_backend == Cpu` so downstream
    /// consumers (Tokhn's concierge etc.) can route based on the
    /// hardware that's actually present, not just what CAR's
    /// inference backends currently target.
    #[serde(default)]
    pub gpu_devices: Vec<GpuDevice>,
    /// Recommended model based on available resources.
    pub recommended_model: String,
    /// Recommended max context length in tokens.
    pub recommended_context: usize,
    /// Maximum model size in MB that fits in memory (with headroom for KV cache).
    pub max_model_mb: u64,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum GpuBackend {
    Metal,
    Cuda,
    Cpu,
}

/// Acceleration tier this build can actually drive — derived from
/// `gpu_backend` plus the OS-reported `gpu_devices` list.
///
/// The enum is intentionally narrower than the underlying detection
/// data: routing layers and Tokhn's concierge want a one-look answer
/// to "what tier is this user on" without each re-applying the
/// gpu_backend × gpu_devices × feature-flag matrix themselves.
///
/// `UnsupportedDiscreteGpu` is the case #93 exists to surface — a
/// discrete NVIDIA / AMD / Intel GPU is present but CAR's compiled
/// inference backends can't drive it yet (cuda / metal feature not
/// compiled in, or no DirectML / Vulkan / ROCm backend exists for
/// this build). Routing logic should tier these systems above
/// `Cpu`-only systems even though the actual inference path is the
/// same — they have hardware that *could* be driven once a backend
/// lands, and the routing should bias toward keeping the door open
/// (e.g. recommending GGUF + CPU-bias rather than the smallest
/// possible model).
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(tag = "tier", rename_all = "snake_case")]
pub enum SupportedAcceleration {
    /// Apple Silicon with the `metal` feature compiled. Active path
    /// is the MLX backend.
    Apple {
        /// Total system memory in MB; Apple Silicon's unified memory
        /// is shared between CPU and GPU.
        unified_memory_mb: u64,
    },
    /// NVIDIA with the `cuda` feature compiled. Active path is
    /// Candle + CUDA. `device_memory_mb` comes from `nvidia-smi`
    /// when available.
    Cuda { device_memory_mb: Option<u64> },
    /// Discrete GPU detected but no compiled backend can drive it.
    /// CAR falls back to CPU inference. Future DirectML / Vulkan /
    /// ROCm backends will move qualifying systems out of this tier.
    UnsupportedDiscreteGpu {
        vendor: GpuVendor,
        name: String,
        memory_mb: Option<u64>,
    },
    /// CPU-only — either no GPU at all, or only an integrated GPU
    /// without a usable inference path.
    Cpu,
}

/// One discrete or integrated GPU as reported by the OS.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct GpuDevice {
    pub vendor: GpuVendor,
    pub name: String,
    /// VRAM (or unified memory) in megabytes when the OS reports it.
    /// Linux sysfs doesn't expose this universally; Windows WMI does.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub memory_mb: Option<u64>,
}

/// GPU vendor identity. `Other(String)` carries the raw vendor
/// string for hardware we don't yet enumerate (Moore Threads, etc.).
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum GpuVendor {
    Apple,
    Nvidia,
    Amd,
    Intel,
    Other(String),
}

impl GpuVendor {
    /// Map a PCI vendor ID (Linux sysfs reports as `0x...`) to a
    /// vendor identity. The IDs are stable; AMD = 0x1002, NVIDIA =
    /// 0x10de, Intel = 0x8086.
    pub fn from_pci_id(raw: &str) -> Self {
        let id = raw.trim().trim_start_matches("0x").to_ascii_lowercase();
        match id.as_str() {
            "1002" => Self::Amd,
            "10de" => Self::Nvidia,
            "8086" => Self::Intel,
            "106b" => Self::Apple,
            other => Self::Other(other.to_string()),
        }
    }

    /// Map a free-form name (Windows WMI / macOS system_profiler) to
    /// a vendor identity. Falls back to `Other(name)` when unknown.
    pub fn from_name(name: &str) -> Self {
        let lower = name.to_ascii_lowercase();
        if lower.contains("nvidia")
            || lower.contains("geforce")
            || lower.contains("quadro")
            || lower.contains("tesla")
            || lower.contains("rtx")
            || lower.contains("gtx")
        {
            Self::Nvidia
        } else if lower.contains("amd")
            || lower.contains("radeon")
            || lower.contains("rx ")
            || lower.contains("vega")
            || lower.contains("instinct")
        {
            Self::Amd
        } else if lower.contains("intel")
            || lower.contains("arc ")
            || lower.contains("iris")
            || lower.contains("uhd graphics")
            || lower.contains("hd graphics")
        {
            Self::Intel
        } else if lower.contains("apple")
            || lower.starts_with("m1")
            || lower.starts_with("m2")
            || lower.starts_with("m3")
            || lower.starts_with("m4")
        {
            Self::Apple
        } else {
            Self::Other(name.to_string())
        }
    }
}

impl HardwareInfo {
    /// One-look acceleration tier derived from `gpu_backend` and
    /// `gpu_devices`. See [`SupportedAcceleration`] for the variant
    /// semantics.
    ///
    /// "Discrete GPU" here means *not* an integrated graphics chip —
    /// integrated GPUs share system RAM and don't typically warrant
    /// a separate routing tier from CPU. The heuristic recognises
    /// integrated parts by name (`HD Graphics`, `UHD Graphics`,
    /// `Iris`, `Vega 8/11/...` integrated APUs).
    pub fn supported_acceleration(&self) -> SupportedAcceleration {
        match self.gpu_backend {
            GpuBackend::Metal => SupportedAcceleration::Apple {
                unified_memory_mb: self.total_ram_mb,
            },
            GpuBackend::Cuda => SupportedAcceleration::Cuda {
                device_memory_mb: self.gpu_memory_mb,
            },
            GpuBackend::Cpu => {
                // Look for a discrete GPU we can't drive. Picks the
                // first matching device — multi-GPU systems pick the
                // first reported device, which is fine for tiering.
                if let Some(dev) = self
                    .gpu_devices
                    .iter()
                    .find(|d| is_discrete_gpu(&d.vendor, &d.name))
                {
                    SupportedAcceleration::UnsupportedDiscreteGpu {
                        vendor: dev.vendor.clone(),
                        name: dev.name.clone(),
                        memory_mb: dev.memory_mb,
                    }
                } else {
                    SupportedAcceleration::Cpu
                }
            }
        }
    }

    /// Auto-detect system hardware and compute recommendations.
    pub fn detect() -> Self {
        let os = detect_os();
        let arch = std::env::consts::ARCH.to_string();
        let cpu_cores = std::thread::available_parallelism()
            .map(|n| n.get())
            .unwrap_or(1);
        let total_ram_mb = detect_ram_mb();
        let gpu_backend = detect_gpu_backend();
        let gpu_memory_mb = detect_gpu_memory_mb(&gpu_backend, total_ram_mb);
        let gpu_devices = detect_gpu_devices();

        // Compute how much memory is available for models
        // On unified memory (Apple Silicon): GPU shares system RAM
        // Budget: use ~60% of available memory for model weights.
        // Reserve ~700MB for the embedding model family.
        // Rest for KV cache, activations, and OS.
        let available_mb = gpu_memory_mb.unwrap_or(total_ram_mb);
        let embedding_model_mb: u64 = 700;
        let max_model_mb = ((available_mb as f64 * 0.6) as u64).saturating_sub(embedding_model_mb);

        let recommended_model = recommend_model(max_model_mb);
        let recommended_context = recommend_context(available_mb, &recommended_model);

        Self {
            os,
            arch,
            cpu_cores,
            total_ram_mb,
            gpu_backend,
            gpu_memory_mb,
            gpu_devices,
            recommended_model,
            recommended_context,
            max_model_mb,
        }
    }
}

fn detect_os() -> String {
    if cfg!(target_os = "macos") {
        "macos".into()
    } else if cfg!(target_os = "linux") {
        "linux".into()
    } else if cfg!(target_os = "windows") {
        "windows".into()
    } else {
        std::env::consts::OS.into()
    }
}

fn detect_ram_mb() -> u64 {
    // macOS: sysctl hw.memsize
    #[cfg(target_os = "macos")]
    {
        if let Ok(output) = std::process::Command::new("sysctl")
            .args(["-n", "hw.memsize"])
            .output()
        {
            if let Ok(s) = String::from_utf8(output.stdout) {
                if let Ok(bytes) = s.trim().parse::<u64>() {
                    return bytes / (1024 * 1024);
                }
            }
        }
    }
    // Linux: /proc/meminfo
    #[cfg(target_os = "linux")]
    {
        if let Ok(content) = std::fs::read_to_string("/proc/meminfo") {
            for line in content.lines() {
                if line.starts_with("MemTotal:") {
                    let parts: Vec<&str> = line.split_whitespace().collect();
                    if parts.len() >= 2 {
                        if let Ok(kb) = parts[1].parse::<u64>() {
                            return kb / 1024;
                        }
                    }
                }
            }
        }
    }
    // Fallback: assume 8GB
    8192
}

/// Heuristic — is this a discrete GPU worth tiering above CPU?
///
/// Integrated graphics (Intel HD/UHD/Iris, AMD APU iGPUs) share
/// system RAM and don't currently warrant a routing tier separate
/// from CPU even when a backend would technically run on them.
/// Discrete GPUs (RTX, RX 7000-series, Arc A-series, Radeon Pro,
/// etc.) have dedicated VRAM and the routing logic wants to know.
fn is_discrete_gpu(vendor: &GpuVendor, name: &str) -> bool {
    // Strip vendor trademark noise (`(TM)`, `(R)`, `™`, `®`) before
    // matching so heuristics work on the wire form WMIC actually
    // emits ("AMD Radeon(TM) Graphics", "Intel(R) UHD Graphics 630").
    let lower = name
        .to_ascii_lowercase()
        .replace("(tm)", "")
        .replace("(r)", "")
        .replace('™', "")
        .replace('®', "");
    // Apple GPUs are always integrated unified-memory; reported on
    // Apple Silicon hosts. Caller already handles those via the
    // Metal tier — exclude here for symmetry.
    if matches!(vendor, GpuVendor::Apple) {
        return false;
    }
    // Intel: integrated unless explicitly Arc.
    if matches!(vendor, GpuVendor::Intel) {
        return lower.contains("arc ");
    }
    // AMD: integrated APU iGPUs typically have very small Vega
    // numbers (Vega 6/7/8/11) or report as the generic "AMD Radeon
    // Graphics" with no model number. Discrete cards are RX,
    // Radeon Pro, Instinct, or W-series.
    if matches!(vendor, GpuVendor::Amd) {
        let integrated_markers = [
            "vega 6",
            "vega 7",
            "vega 8",
            "vega 9",
            "vega 10",
            "vega 11",
            "radeon graphics", // generic APU label, post trademark-strip
        ];
        if integrated_markers.iter().any(|m| lower.contains(m)) {
            return false;
        }
        return true;
    }
    // NVIDIA cards are essentially always discrete (Tegra mobile
    // SoCs aren't a desktop scenario CAR targets).
    if matches!(vendor, GpuVendor::Nvidia) {
        return true;
    }
    // Other / unknown vendors — treat as discrete to stay
    // optimistic. Routing can fall back to CPU at execute time if
    // it's actually unusable.
    matches!(vendor, GpuVendor::Other(_))
}

fn detect_gpu_backend() -> GpuBackend {
    #[cfg(feature = "metal")]
    {
        return GpuBackend::Metal;
    }
    #[cfg(feature = "cuda")]
    {
        return GpuBackend::Cuda;
    }
    #[cfg(not(any(feature = "metal", feature = "cuda")))]
    {
        // No GPU backend feature compiled in — report CPU honestly.
        // Returning Metal here just because we're on Apple Silicon
        // would lie to callers (`HardwareInfo::gpu_backend`): the
        // detection said "yes Metal" but model loading would then
        // fail at runtime because the backend isn't compiled.
        // Per #93: report what we can actually drive, not what the
        // OS exposes.
        GpuBackend::Cpu
    }
}

fn detect_gpu_memory_mb(backend: &GpuBackend, total_ram_mb: u64) -> Option<u64> {
    match backend {
        GpuBackend::Metal => {
            // Apple Silicon has unified memory — GPU can use most of system RAM.
            // macOS reserves ~2-4GB for OS, so usable = total - 4GB
            Some(total_ram_mb.saturating_sub(4096))
        }
        GpuBackend::Cuda => {
            // Try nvidia-smi. The CLI ships with the NVIDIA driver on both
            // Linux and Windows, and the `--query-gpu/--format` invocation is
            // identical on both. macOS dropped CUDA support years ago — no
            // nvidia-smi there.
            #[cfg(any(target_os = "linux", target_os = "windows"))]
            {
                if let Ok(output) = std::process::Command::new("nvidia-smi")
                    .args(["--query-gpu=memory.total", "--format=csv,noheader,nounits"])
                    .output()
                {
                    if let Ok(s) = String::from_utf8(output.stdout) {
                        // Multi-GPU systems print one line per device. Take
                        // the first since the backend currently targets one
                        // device at a time.
                        if let Some(first) = s.lines().next() {
                            if let Ok(mb) = first.trim().parse::<u64>() {
                                return Some(mb);
                            }
                        }
                    }
                }
            }
            None
        }
        GpuBackend::Cpu => None,
    }
}

// ---------------------------------------------------------------------------
// GPU enumeration — vendor-agnostic, reports devices the OS sees regardless
// of whether CAR has an inference backend that can drive them.
// ---------------------------------------------------------------------------

fn detect_gpu_devices() -> Vec<GpuDevice> {
    #[cfg(target_os = "linux")]
    {
        return detect_gpu_devices_linux();
    }
    #[cfg(target_os = "windows")]
    {
        return detect_gpu_devices_windows();
    }
    #[cfg(target_os = "macos")]
    {
        return detect_gpu_devices_macos();
    }
    #[allow(unreachable_code)]
    Vec::new()
}

#[cfg(target_os = "linux")]
fn detect_gpu_devices_linux() -> Vec<GpuDevice> {
    let mut out = Vec::new();
    let drm = match std::fs::read_dir("/sys/class/drm") {
        Ok(d) => d,
        Err(_) => return out,
    };
    let mut seen = std::collections::HashSet::new();
    for entry in drm.flatten() {
        let name = entry.file_name();
        let s = name.to_string_lossy();
        // sysfs symlinks every output (card0-HDMI-A-1, card0-eDP-1, etc.)
        // back to the parent card. We only want the cards themselves.
        if !s.starts_with("card") || s.contains('-') {
            continue;
        }
        let device_path = entry.path().join("device");
        let vendor_id = std::fs::read_to_string(device_path.join("vendor"))
            .ok()
            .map(|s| s.trim().to_string());
        if let Some(vid) = &vendor_id {
            if !seen.insert(format!("{}:{}", vid, entry.path().display())) {
                continue;
            }
        }
        let device_name = read_drm_device_name(&device_path).unwrap_or_else(|| s.to_string());
        let vendor = vendor_id
            .as_deref()
            .map(GpuVendor::from_pci_id)
            .unwrap_or_else(|| GpuVendor::Other("unknown".into()));
        out.push(GpuDevice {
            vendor,
            name: device_name,
            memory_mb: read_drm_memory_mb(&device_path),
        });
    }
    out
}

#[cfg(target_os = "linux")]
fn read_drm_device_name(device: &std::path::Path) -> Option<String> {
    // Vendor-specific. AMD exposes `product_name`; NVIDIA/Intel don't
    // generally. Fall back to PCI device ID hex when no name.
    if let Ok(name) = std::fs::read_to_string(device.join("product_name")) {
        let trimmed = name.trim();
        if !trimmed.is_empty() {
            return Some(trimmed.to_string());
        }
    }
    std::fs::read_to_string(device.join("device"))
        .ok()
        .map(|s| format!("PCI device {}", s.trim()))
}

#[cfg(target_os = "linux")]
fn read_drm_memory_mb(device: &std::path::Path) -> Option<u64> {
    // AMD: mem_info_vram_total (bytes). NVIDIA needs nvidia-smi
    // (handled separately by detect_gpu_memory_mb for the CUDA
    // backend). Intel iGPUs share system RAM and don't report a fixed
    // budget here.
    if let Ok(s) = std::fs::read_to_string(device.join("mem_info_vram_total")) {
        if let Ok(bytes) = s.trim().parse::<u64>() {
            return Some(bytes / (1024 * 1024));
        }
    }
    None
}

#[cfg(target_os = "windows")]
fn detect_gpu_devices_windows() -> Vec<GpuDevice> {
    // wmic is deprecated in modern Win 11 but still installed by
    // default. PowerShell `Get-CimInstance` is the modern path; use
    // it as a fallback when wmic isn't found.
    if let Ok(output) = std::process::Command::new("wmic")
        .args([
            "path",
            "Win32_VideoController",
            "get",
            "Name,AdapterRAM",
            "/format:list",
        ])
        .output()
    {
        if let Ok(s) = String::from_utf8(output.stdout) {
            return parse_windows_wmic_list(&s);
        }
    }
    if let Ok(output) = std::process::Command::new("powershell")
        .args([
            "-NoProfile",
            "-Command",
            "Get-CimInstance Win32_VideoController | Select-Object Name,AdapterRAM | ConvertTo-Csv -NoTypeInformation",
        ])
        .output()
    {
        if let Ok(s) = String::from_utf8(output.stdout) {
            return parse_windows_wmic_csv(&s);
        }
    }
    Vec::new()
}

#[cfg(target_os = "macos")]
fn detect_gpu_devices_macos() -> Vec<GpuDevice> {
    if let Ok(output) = std::process::Command::new("system_profiler")
        .args(["SPDisplaysDataType", "-json"])
        .output()
    {
        if let Ok(s) = String::from_utf8(output.stdout) {
            return parse_macos_system_profiler_json(&s);
        }
    }
    Vec::new()
}

// Parsers separated from the I/O so they're unit-testable without
// shelling out to the OS.

#[allow(dead_code)] // referenced from cfg(target_os = "windows") and from tests
fn parse_windows_wmic_list(stdout: &str) -> Vec<GpuDevice> {
    let mut out = Vec::new();
    let mut name: Option<String> = None;
    let mut adapter_ram_bytes: Option<u64> = None;
    for line in stdout.lines() {
        let line = line.trim();
        if line.is_empty() {
            if let Some(n) = name.take() {
                out.push(GpuDevice {
                    vendor: GpuVendor::from_name(&n),
                    name: n,
                    memory_mb: adapter_ram_bytes.map(|b| b / (1024 * 1024)),
                });
            }
            adapter_ram_bytes = None;
            continue;
        }
        if let Some(rest) = line.strip_prefix("Name=") {
            name = Some(rest.to_string());
        } else if let Some(rest) = line.strip_prefix("AdapterRAM=") {
            adapter_ram_bytes = rest.parse::<u64>().ok();
        }
    }
    if let Some(n) = name {
        out.push(GpuDevice {
            vendor: GpuVendor::from_name(&n),
            name: n,
            memory_mb: adapter_ram_bytes.map(|b| b / (1024 * 1024)),
        });
    }
    out
}

#[allow(dead_code)]
fn parse_windows_wmic_csv(stdout: &str) -> Vec<GpuDevice> {
    let mut out = Vec::new();
    let mut headers: Option<Vec<String>> = None;
    for line in stdout.lines() {
        let line = line.trim();
        if line.is_empty() {
            continue;
        }
        let cols: Vec<String> = line
            .split(',')
            .map(|c| c.trim().trim_matches('"').to_string())
            .collect();
        if headers.is_none() {
            headers = Some(cols);
            continue;
        }
        let h = headers.as_ref().unwrap();
        let mut name = String::new();
        let mut bytes: Option<u64> = None;
        for (i, col) in cols.iter().enumerate() {
            match h.get(i).map(|s| s.as_str()) {
                Some("Name") => name = col.clone(),
                Some("AdapterRAM") => bytes = col.parse::<u64>().ok(),
                _ => {}
            }
        }
        if !name.is_empty() {
            out.push(GpuDevice {
                vendor: GpuVendor::from_name(&name),
                name,
                memory_mb: bytes.map(|b| b / (1024 * 1024)),
            });
        }
    }
    out
}

#[allow(dead_code)]
fn parse_macos_system_profiler_json(stdout: &str) -> Vec<GpuDevice> {
    // system_profiler emits something like:
    // { "SPDisplaysDataType": [
    //   { "_name": "Apple M3 Max", "spdisplays_vram_shared": "...", ... }
    // ] }
    let value: serde_json::Value = match serde_json::from_str(stdout) {
        Ok(v) => v,
        Err(_) => return Vec::new(),
    };
    let displays = match value.get("SPDisplaysDataType").and_then(|v| v.as_array()) {
        Some(a) => a,
        None => return Vec::new(),
    };
    let mut out = Vec::new();
    for d in displays {
        let name = d
            .get("_name")
            .and_then(|v| v.as_str())
            .unwrap_or("")
            .to_string();
        if name.is_empty() {
            continue;
        }
        // VRAM keys vary by Mac generation. spdisplays_vram (discrete),
        // spdisplays_vram_shared (Apple Silicon unified). Both are
        // human strings like "8 GB" or "Up to 96 GB" — parse loosely.
        let memory_mb = d
            .get("spdisplays_vram")
            .or_else(|| d.get("spdisplays_vram_shared"))
            .and_then(|v| v.as_str())
            .and_then(parse_human_memory_mb);
        out.push(GpuDevice {
            vendor: GpuVendor::from_name(&name),
            name,
            memory_mb,
        });
    }
    out
}

fn parse_human_memory_mb(s: &str) -> Option<u64> {
    // "8 GB", "Up to 96 GB", "1024 MB". Find the first number then a
    // unit token after it.
    let mut number_buf = String::new();
    let mut after_number = false;
    let mut unit_buf = String::new();
    for ch in s.chars() {
        if ch.is_ascii_digit() && !after_number {
            number_buf.push(ch);
        } else if !number_buf.is_empty() && (ch.is_ascii_alphabetic()) {
            after_number = true;
            unit_buf.push(ch);
        } else if after_number && unit_buf.len() >= 2 {
            break;
        }
    }
    let value: u64 = number_buf.parse().ok()?;
    let unit = unit_buf.to_ascii_uppercase();
    let mb = if unit.starts_with("GB") {
        value * 1024
    } else if unit.starts_with("MB") {
        value
    } else if unit.starts_with("KB") {
        value / 1024
    } else {
        return None;
    };
    Some(mb)
}

/// Recommend the best model that fits in available memory.
fn recommend_model(max_model_mb: u64) -> String {
    #[cfg(all(target_os = "macos", target_arch = "aarch64", not(car_skip_mlx)))]
    {
        if max_model_mb >= 17000 {
            return "Qwen3-30B-A3B-MLX".into();
        } else if max_model_mb >= 4900 {
            return "Qwen3-8B-MLX".into();
        } else if max_model_mb >= 2500 {
            return "Qwen3-4B-MLX".into();
        } else if max_model_mb >= 800 {
            return "Qwen3-1.7B-MLX".into();
        }

        return "Qwen3-0.6B-MLX".into();
    }

    #[cfg(not(all(target_os = "macos", target_arch = "aarch64", not(car_skip_mlx))))]
    if max_model_mb >= 17000 {
        "Qwen3-30B-A3B".into()
    } else if max_model_mb >= 4900 {
        "Qwen3-8B".into()
    } else if max_model_mb >= 2500 {
        "Qwen3-4B".into()
    } else if max_model_mb >= 1800 {
        "Qwen3-1.7B".into()
    } else {
        "Qwen3-0.6B".into()
    }
}

/// Recommend context length based on available memory and model size.
fn recommend_context(available_mb: u64, model_name: &str) -> usize {
    let model_mb = match model_name {
        "Qwen3-0.6B" => 650,
        "Qwen3-1.7B" => 1800,
        "Qwen3-4B" => 2500,
        "Qwen3-4B-MLX" => 2400,
        "Qwen3-8B" => 4900,
        "Qwen3-8B-MLX" => 4800,
        "Qwen3-30B-A3B" => 17000,
        "Qwen3-30B-A3B-MLX" => 16500,
        "Qwen3-1.7B-MLX" => 800,
        "Qwen3-0.6B-MLX" => 500,
        _ => 650,
    };
    let kv_cost_per_1k = match model_name {
        "Qwen3-0.6B" => 0.1,
        "Qwen3-1.7B" => 0.3,
        "Qwen3-4B" => 0.5,
        "Qwen3-4B-MLX" => 0.5,
        "Qwen3-8B" => 1.0,
        "Qwen3-8B-MLX" => 1.0,
        "Qwen3-30B-A3B" => 1.5,
        "Qwen3-30B-A3B-MLX" => 1.5,
        "Qwen3-1.7B-MLX" => 0.3,
        "Qwen3-0.6B-MLX" => 0.1,
        _ => 0.1,
    };

    let kv_budget_mb = available_mb.saturating_sub(model_mb).saturating_sub(1024) as f64;
    let max_context = (kv_budget_mb / kv_cost_per_1k * 1000.0) as usize;

    max_context.clamp(2048, 131072)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn pci_id_to_vendor_known() {
        assert_eq!(GpuVendor::from_pci_id("0x1002"), GpuVendor::Amd);
        assert_eq!(GpuVendor::from_pci_id("0x10de"), GpuVendor::Nvidia);
        assert_eq!(GpuVendor::from_pci_id("0x8086"), GpuVendor::Intel);
        assert_eq!(GpuVendor::from_pci_id("0x10DE"), GpuVendor::Nvidia);
    }

    #[test]
    fn pci_id_to_vendor_unknown_falls_through() {
        match GpuVendor::from_pci_id("0xabcd") {
            GpuVendor::Other(s) => assert_eq!(s, "abcd"),
            other => panic!("expected Other, got {other:?}"),
        }
    }

    #[test]
    fn name_to_vendor_picks_amd_radeon() {
        assert_eq!(
            GpuVendor::from_name("AMD Radeon RX 7900 XTX"),
            GpuVendor::Amd
        );
        assert_eq!(GpuVendor::from_name("Radeon Pro 580X"), GpuVendor::Amd);
    }

    #[test]
    fn name_to_vendor_picks_nvidia() {
        assert_eq!(
            GpuVendor::from_name("NVIDIA GeForce RTX 4090"),
            GpuVendor::Nvidia
        );
        assert_eq!(GpuVendor::from_name("Quadro P2200"), GpuVendor::Nvidia);
    }

    #[test]
    fn name_to_vendor_picks_intel_arc_and_iris() {
        assert_eq!(GpuVendor::from_name("Intel Arc A770"), GpuVendor::Intel);
        assert_eq!(
            GpuVendor::from_name("Intel Iris Xe Graphics"),
            GpuVendor::Intel
        );
    }

    #[test]
    fn name_to_vendor_picks_apple_silicon() {
        assert_eq!(GpuVendor::from_name("Apple M3 Max"), GpuVendor::Apple);
    }

    #[test]
    fn parse_wmic_list_two_devices() {
        // Real wmic /format:list output has CRLF + blank line separators.
        let stdout = "\r\nName=NVIDIA GeForce RTX 4090\r\nAdapterRAM=25756221440\r\n\r\nName=Intel UHD Graphics 770\r\nAdapterRAM=1073741824\r\n\r\n";
        let devices = parse_windows_wmic_list(stdout);
        assert_eq!(devices.len(), 2);
        assert_eq!(devices[0].vendor, GpuVendor::Nvidia);
        assert_eq!(devices[0].name, "NVIDIA GeForce RTX 4090");
        assert_eq!(devices[0].memory_mb, Some(25756221440 / (1024 * 1024)));
        assert_eq!(devices[1].vendor, GpuVendor::Intel);
        assert_eq!(devices[1].memory_mb, Some(1024));
    }

    #[test]
    fn parse_wmic_list_handles_missing_ram() {
        // Some virtual adapters omit AdapterRAM. memory_mb stays None.
        let stdout = "Name=Microsoft Basic Display Adapter\r\n\r\n";
        let devices = parse_windows_wmic_list(stdout);
        assert_eq!(devices.len(), 1);
        assert_eq!(devices[0].memory_mb, None);
    }

    #[test]
    fn parse_macos_system_profiler_apple_silicon() {
        let stdout = r#"{
          "SPDisplaysDataType": [
            {
              "_name": "Apple M3 Max",
              "spdisplays_vram_shared": "Up to 96 GB"
            }
          ]
        }"#;
        let devices = parse_macos_system_profiler_json(stdout);
        assert_eq!(devices.len(), 1);
        assert_eq!(devices[0].vendor, GpuVendor::Apple);
        assert_eq!(devices[0].memory_mb, Some(96 * 1024));
    }

    #[test]
    fn parse_macos_system_profiler_discrete_amd() {
        let stdout = r#"{
          "SPDisplaysDataType": [
            {
              "_name": "AMD Radeon Pro 5500M",
              "spdisplays_vram": "8 GB"
            }
          ]
        }"#;
        let devices = parse_macos_system_profiler_json(stdout);
        assert_eq!(devices.len(), 1);
        assert_eq!(devices[0].vendor, GpuVendor::Amd);
        assert_eq!(devices[0].memory_mb, Some(8 * 1024));
    }

    #[test]
    fn parse_macos_system_profiler_handles_garbage() {
        assert!(parse_macos_system_profiler_json("not json").is_empty());
        assert!(parse_macos_system_profiler_json("{}").is_empty());
    }

    #[cfg(target_os = "macos")]
    #[test]
    fn live_macos_detection_returns_at_least_one_device() {
        // system_profiler is part of the base macOS install — not
        // gated by Xcode tools. Should always return at least the
        // built-in GPU on a Mac.
        let devices = detect_gpu_devices_macos();
        assert!(
            !devices.is_empty(),
            "expected at least one GPU device on macOS",
        );
    }

    #[test]
    fn human_memory_parser() {
        assert_eq!(parse_human_memory_mb("8 GB"), Some(8 * 1024));
        assert_eq!(parse_human_memory_mb("Up to 96 GB"), Some(96 * 1024));
        assert_eq!(parse_human_memory_mb("1024 MB"), Some(1024));
        assert_eq!(parse_human_memory_mb("nope"), None);
    }

    fn hw_with_devices(backend: GpuBackend, devices: Vec<GpuDevice>) -> HardwareInfo {
        HardwareInfo {
            os: "linux".into(),
            arch: "x86_64".into(),
            cpu_cores: 8,
            total_ram_mb: 32_000,
            gpu_backend: backend,
            gpu_memory_mb: None,
            gpu_devices: devices,
            recommended_model: String::new(),
            recommended_context: 4096,
            max_model_mb: 0,
        }
    }

    #[test]
    fn discrete_gpu_heuristic() {
        // NVIDIA is always discrete in CAR's target.
        assert!(is_discrete_gpu(&GpuVendor::Nvidia, "GeForce RTX 4090"));

        // AMD: discrete cards yes, APU iGPUs no.
        assert!(is_discrete_gpu(&GpuVendor::Amd, "Radeon RX 7900 XTX"));
        assert!(is_discrete_gpu(&GpuVendor::Amd, "Radeon Pro W7900"));
        assert!(!is_discrete_gpu(&GpuVendor::Amd, "Radeon Vega 8 Graphics"));
        assert!(!is_discrete_gpu(&GpuVendor::Amd, "AMD Radeon(TM) Graphics"));

        // Intel: only Arc counts as discrete.
        assert!(is_discrete_gpu(
            &GpuVendor::Intel,
            "Intel Arc A770 Graphics"
        ));
        assert!(!is_discrete_gpu(
            &GpuVendor::Intel,
            "Intel(R) UHD Graphics 630"
        ));
        assert!(!is_discrete_gpu(
            &GpuVendor::Intel,
            "Intel Iris Xe Graphics"
        ));

        // Apple GPUs are unified-memory, never "discrete" in this sense.
        assert!(!is_discrete_gpu(&GpuVendor::Apple, "Apple M3 Max"));
    }

    #[test]
    fn supported_acceleration_metal_apple_silicon() {
        let hw = hw_with_devices(
            GpuBackend::Metal,
            vec![GpuDevice {
                vendor: GpuVendor::Apple,
                name: "Apple M3".into(),
                memory_mb: None,
            }],
        );
        match hw.supported_acceleration() {
            SupportedAcceleration::Apple { unified_memory_mb } => {
                assert_eq!(unified_memory_mb, 32_000);
            }
            other => panic!("expected Apple, got {:?}", other),
        }
    }

    #[test]
    fn supported_acceleration_cuda_with_memory() {
        let mut hw = hw_with_devices(
            GpuBackend::Cuda,
            vec![GpuDevice {
                vendor: GpuVendor::Nvidia,
                name: "GeForce RTX 4090".into(),
                memory_mb: Some(24_000),
            }],
        );
        hw.gpu_memory_mb = Some(24_000);
        match hw.supported_acceleration() {
            SupportedAcceleration::Cuda { device_memory_mb } => {
                assert_eq!(device_memory_mb, Some(24_000));
            }
            other => panic!("expected Cuda, got {:?}", other),
        }
    }

    #[test]
    fn supported_acceleration_unsupported_amd_discrete() {
        let hw = hw_with_devices(
            GpuBackend::Cpu,
            vec![GpuDevice {
                vendor: GpuVendor::Amd,
                name: "Radeon RX 7900 XTX".into(),
                memory_mb: Some(24_000),
            }],
        );
        match hw.supported_acceleration() {
            SupportedAcceleration::UnsupportedDiscreteGpu {
                vendor,
                name,
                memory_mb,
            } => {
                assert_eq!(vendor, GpuVendor::Amd);
                assert!(name.contains("7900"));
                assert_eq!(memory_mb, Some(24_000));
            }
            other => panic!("expected UnsupportedDiscreteGpu, got {:?}", other),
        }
    }

    #[test]
    fn supported_acceleration_integrated_falls_to_cpu() {
        // Intel UHD Graphics is integrated — same tier as CPU-only.
        let hw = hw_with_devices(
            GpuBackend::Cpu,
            vec![GpuDevice {
                vendor: GpuVendor::Intel,
                name: "Intel(R) UHD Graphics 630".into(),
                memory_mb: None,
            }],
        );
        assert_eq!(hw.supported_acceleration(), SupportedAcceleration::Cpu);
    }

    #[test]
    fn supported_acceleration_no_gpu_at_all() {
        let hw = hw_with_devices(GpuBackend::Cpu, vec![]);
        assert_eq!(hw.supported_acceleration(), SupportedAcceleration::Cpu);
    }
}