mermaid-cli 0.7.1

use crate::constants::{DEFAULT_MAX_TOKENS, DEFAULT_OLLAMA_PORT, DEFAULT_TEMPERATURE};
use crate::models::ReasoningLevel;
use anyhow::{Context, Result};
use directories::ProjectDirs;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::PathBuf;

/// Main configuration structure
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Config {
    /// Last used model (persisted between sessions)
    #[serde(default)]
    pub last_used_model: Option<String>,

    /// Default model configuration
    #[serde(default)]
    pub default_model: ModelSettings,

    /// Ollama configuration
    #[serde(default)]
    pub ollama: OllamaConfig,

    /// Non-interactive mode configuration
    #[serde(default)]
    pub non_interactive: NonInteractiveConfig,

    /// MCP server configurations
    #[serde(default)]
    pub mcp_servers: HashMap<String, McpServerConfig>,

    /// User overrides + custom OpenAI-compatible providers. Keys are
    /// provider names; matching a built-in registry entry overrides its
    /// defaults, anything else defines a fully custom provider.
    /// Example:
    /// ```toml
    /// [providers.groq]
    /// api_key_env = "MY_GROQ_KEY"  # override default GROQ_API_KEY
    ///
    /// [providers.my-vllm]
    /// base_url = "http://192.168.1.42:8000/v1"
    /// api_key_env = "VLLM_KEY"
    /// compat = "openai-effort"
    /// ```
    #[serde(default)]
    pub providers: HashMap<String, UserProviderConfig>,

    /// Per-model reasoning preferences keyed by full model ID
    /// (`provider/name`). Set when the user runs `/reasoning <level>` or
    /// Alt+T cycles while using a specific model — the new value sticks
    /// for that model until changed. Falls back to
    /// `default_model.reasoning` when no entry exists.
    /// Example:
    /// ```toml
    /// [reasoning_per_model]
    /// "anthropic/claude-sonnet-4-6" = "high"
    /// "ollama/qwen3-coder:30b" = "low"
    /// ```
    #[serde(default)]
    pub reasoning_per_model: HashMap<String, ReasoningLevel>,
}

/// User-supplied OpenAI-compatible provider configuration. All fields are
/// optional — when matching a built-in registry entry, only the supplied
/// fields override; the rest fall back to the registry defaults. For
/// fully custom providers, `base_url` and `api_key_env` are required.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct UserProviderConfig {
    /// Override base URL for `/chat/completions` (None = use built-in
    /// registry default; required for fully custom providers).
    #[serde(default)]
    pub base_url: Option<String>,
    /// Env var name to read the API key from (None = use the built-in
    /// registry default like `GROQ_API_KEY`; required for fully custom
    /// providers).
    #[serde(default)]
    pub api_key_env: Option<String>,
    /// Extra HTTP headers sent on every request to this provider.
    #[serde(default)]
    pub extra_headers: HashMap<String, String>,
    /// For fully custom providers (no built-in registry entry), declares
    /// which OpenAI-compatible shape the endpoint speaks. Ignored when
    /// the provider name matches a built-in registry entry. Values:
    /// `"openai"` (no reasoning), `"openai-effort"` (`reasoning_effort`
    /// field), `"openrouter"` (nested `reasoning: {effort}` object).
    #[serde(default)]
    pub compat: Option<String>,
    /// Optional preferred model — surfaced by `mermaid status` and used
    /// as the default when the user picks this provider with no model
    /// suffix.
    #[serde(default)]
    pub default_model: Option<String>,
}

/// MCP server configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct McpServerConfig {
    /// Command to execute (e.g., "npx", "node", "python")
    pub command: String,
    /// Command-line arguments
    #[serde(default)]
    pub args: Vec<String>,
    /// Environment variables for the server process
    #[serde(default)]
    pub env: HashMap<String, String>,
}

/// Default model settings
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct ModelSettings {
    /// Model provider (ollama, openai, anthropic)
    pub provider: String,
    /// Model name
    pub name: String,
    /// Temperature for generation
    pub temperature: f32,
    /// Maximum tokens to generate
    pub max_tokens: usize,
    /// Default reasoning depth used for new sessions when no `--reasoning`
    /// flag is given. Each adapter snaps this onto the closest level the
    /// model actually supports via `nearest_effort()`.
    pub reasoning: ReasoningLevel,
}

impl Default for ModelSettings {
    fn default() -> Self {
        Self {
            provider: String::new(),
            name: String::new(),
            temperature: DEFAULT_TEMPERATURE,
            max_tokens: DEFAULT_MAX_TOKENS,
            reasoning: ReasoningLevel::default(),
        }
    }
}

/// Ollama configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct OllamaConfig {
    /// Ollama server host
    pub host: String,
    /// Ollama server port
    pub port: u16,
    /// Ollama cloud API key (for :cloud models)
    /// Set this to use Ollama's cloud inference service
    /// Get your key at: https://ollama.com/cloud
    pub cloud_api_key: Option<String>,
    /// Number of GPU layers to offload (None = auto, 0 = CPU only, positive = specific count)
    /// Lower values free up VRAM for larger models at the cost of speed
    pub num_gpu: Option<i32>,
    /// Number of CPU threads for processing offloaded layers
    /// Higher values improve CPU inference speed for large models
    pub num_thread: Option<i32>,
    /// Context window size (number of tokens)
    /// Larger values allow longer conversations but use more memory
    pub num_ctx: Option<i32>,
    /// Enable NUMA optimization for multi-CPU systems
    pub numa: Option<bool>,
}

impl Default for OllamaConfig {
    fn default() -> Self {
        Self {
            host: String::from("localhost"),
            port: DEFAULT_OLLAMA_PORT,
            cloud_api_key: None,
            num_gpu: None,    // Let Ollama auto-detect
            num_thread: None, // Let Ollama auto-detect
            num_ctx: None,    // Use model default
            numa: None,       // Auto-detect
        }
    }
}

/// Non-interactive mode configuration
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct NonInteractiveConfig {
    /// Output format (text, json, markdown)
    pub output_format: String,
    /// Maximum tokens to generate
    pub max_tokens: usize,
    /// Don't execute agent actions (dry run)
    pub no_execute: bool,
}

impl Default for NonInteractiveConfig {
    fn default() -> Self {
        Self {
            output_format: String::from("text"),
            max_tokens: DEFAULT_MAX_TOKENS,
            no_execute: false,
        }
    }
}

/// Load configuration from single config file
/// Priority: config file > defaults (that's it - no merging, no env vars)
pub fn load_config() -> Result<Config> {
    let config_path = get_config_path()?;

    if config_path.exists() {
        let toml_str = std::fs::read_to_string(&config_path)
            .with_context(|| format!("Failed to read {}", config_path.display()))?;
        let config: Config = toml::from_str(&toml_str).with_context(|| {
            format!(
                "Failed to parse {}. Run 'mermaid init' to regenerate.",
                config_path.display()
            )
        })?;
        Ok(config)
    } else {
        Ok(Config::default())
    }
}

/// Get the path to the single config file
pub fn get_config_path() -> Result<PathBuf> {
    Ok(get_config_dir()?.join("config.toml"))
}

/// Get the configuration directory
pub fn get_config_dir() -> Result<PathBuf> {
    if let Some(proj_dirs) = ProjectDirs::from("", "", "mermaid") {
        let config_dir = proj_dirs.config_dir();
        std::fs::create_dir_all(config_dir)?;
        Ok(config_dir.to_path_buf())
    } else {
        // Fallback to home directory
        let home = std::env::var("HOME")
            .or_else(|_| std::env::var("USERPROFILE"))
            .context("Could not determine home directory")?;
        let config_dir = PathBuf::from(home).join(".config").join("mermaid");
        std::fs::create_dir_all(&config_dir)?;
        Ok(config_dir)
    }
}

/// Save configuration to file
pub fn save_config(config: &Config, path: Option<PathBuf>) -> Result<()> {
    let path = if let Some(p) = path {
        p
    } else {
        get_config_dir()?.join("config.toml")
    };

    let toml_string = toml::to_string_pretty(config)?;
    std::fs::write(&path, toml_string)
        .with_context(|| format!("Failed to write config to {}", path.display()))?;

    Ok(())
}

/// Create a default configuration file if it doesn't exist
pub fn init_config() -> Result<()> {
    let config_file = get_config_path()?;

    if config_file.exists() {
        println!("Configuration already exists at: {}", config_file.display());
    } else {
        let default_config = Config::default();
        save_config(&default_config, Some(config_file.clone()))?;
        println!("Created configuration at: {}", config_file.display());
    }

    Ok(())
}

/// Persist the last used model to config file
pub fn persist_last_model(model: &str) -> Result<()> {
    let mut config = load_config().unwrap_or_default();
    config.last_used_model = Some(model.to_string());
    save_config(&config, None)
}

/// Persist the user's default reasoning level to config file. Mirrors
/// `persist_last_model` — used by the `/reasoning` slash command and the
/// Alt+T cycle handler so the choice survives across sessions.
pub fn persist_default_reasoning(level: ReasoningLevel) -> Result<()> {
    let mut config = load_config().unwrap_or_default();
    config.default_model.reasoning = level;
    save_config(&config, None)
}

/// Persist a reasoning level for a specific model ID
/// (e.g. `anthropic/claude-sonnet-4-6`). The TUI calls this from Alt+T,
/// `/reasoning <level>`, and the does-not-support-thinking auto-snap so
/// the choice sticks per-model rather than bleeding into other models on
/// next session start.
pub fn persist_reasoning_for_model(model_id: &str, level: ReasoningLevel) -> Result<()> {
    let mut config = load_config().unwrap_or_default();
    config
        .reasoning_per_model
        .insert(model_id.to_string(), level);
    save_config(&config, None)
}

/// Resolve which model to use: CLI arg > last_used > default_model > any available
pub async fn resolve_model_id(cli_model: Option<&str>, config: &Config) -> anyhow::Result<String> {
    if let Some(model) = cli_model {
        return Ok(model.to_string());
    }
    if let Some(last_model) = &config.last_used_model {
        return Ok(last_model.clone());
    }
    if !config.default_model.provider.is_empty() && !config.default_model.name.is_empty() {
        return Ok(format!(
            "{}/{}",
            config.default_model.provider, config.default_model.name
        ));
    }
    let available = crate::ollama::require_any_model(config).await?;
    // `require_any_model` already errors on empty, so this `.first()` is
    // never `None` in practice. Use `.first()` over `[0]` so the precondition
    // is enforced by the type system instead of by a comment.
    let first = available
        .first()
        .ok_or_else(|| anyhow::anyhow!("require_any_model returned empty list"))?;
    Ok(format!("ollama/{}", first))
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Configs persisted before Step 4 don't have a `reasoning` field on
    /// `[default_model]`. Loading them must succeed and yield the
    /// `Medium` default — otherwise existing user configs break on
    /// upgrade.
    #[test]
    fn model_settings_deserializes_without_reasoning_field() {
        let toml_blob = r#"
            provider = "ollama"
            name = "qwen3-coder:30b"
            temperature = 0.7
            max_tokens = 4096
        "#;
        let settings: ModelSettings = toml::from_str(toml_blob).expect("backward compat");
        assert_eq!(settings.reasoning, ReasoningLevel::Medium);
        assert_eq!(settings.provider, "ollama");
    }

    #[test]
    fn model_settings_round_trips_reasoning_high() {
        let original = ModelSettings {
            provider: "anthropic".to_string(),
            name: "claude-sonnet-4-6".to_string(),
            temperature: 0.5,
            max_tokens: 8192,
            reasoning: ReasoningLevel::High,
        };
        let toml_blob = toml::to_string(&original).expect("serialize");
        let back: ModelSettings = toml::from_str(&toml_blob).expect("deserialize");
        assert_eq!(back.reasoning, ReasoningLevel::High);
        assert_eq!(back.name, "claude-sonnet-4-6");
    }

    /// `persist_default_reasoning` writes to the real config path, so
    /// this test goes through `save_config(_, Some(path))` directly to
    /// avoid clobbering the user's actual `~/.config/mermaid/config.toml`.
    /// Uses `std::env::temp_dir` (matching the pattern in
    /// `session::conversation` and `utils::logger`) — no external
    /// `tempfile` crate dependency.
    #[test]
    fn save_and_reload_preserves_reasoning_field() {
        let dir = std::env::temp_dir().join("mermaid_test_config_reasoning");
        std::fs::create_dir_all(&dir).expect("create temp dir");
        let path = dir.join("config.toml");

        let mut cfg = Config::default();
        cfg.default_model.provider = "ollama".to_string();
        cfg.default_model.name = "qwen3-coder:30b".to_string();
        cfg.default_model.reasoning = ReasoningLevel::Low;

        save_config(&cfg, Some(path.clone())).expect("save");

        let blob = std::fs::read_to_string(&path).expect("read");
        let loaded: Config = toml::from_str(&blob).expect("parse back");
        assert_eq!(loaded.default_model.reasoning, ReasoningLevel::Low);

        let _ = std::fs::remove_dir_all(&dir);
    }

    /// Per-model entries serialize as a TOML table with quoted keys (the
    /// model IDs contain `/`). This test verifies the round-trip works
    /// through both serialization and deserialization, matching what
    /// `persist_reasoning_for_model` would produce in real use.
    #[test]
    fn save_and_reload_preserves_reasoning_per_model_table() {
        let dir = std::env::temp_dir().join("mermaid_test_config_per_model_reasoning");
        std::fs::create_dir_all(&dir).expect("create temp dir");
        let path = dir.join("config.toml");

        let mut cfg = Config::default();
        cfg.reasoning_per_model.insert(
            "anthropic/claude-sonnet-4-6".to_string(),
            ReasoningLevel::High,
        );
        cfg.reasoning_per_model
            .insert("ollama/qwen3-coder:30b".to_string(), ReasoningLevel::Low);

        save_config(&cfg, Some(path.clone())).expect("save");

        let blob = std::fs::read_to_string(&path).expect("read");
        let loaded: Config = toml::from_str(&blob).expect("parse back");
        assert_eq!(
            loaded
                .reasoning_per_model
                .get("anthropic/claude-sonnet-4-6"),
            Some(&ReasoningLevel::High)
        );
        assert_eq!(
            loaded.reasoning_per_model.get("ollama/qwen3-coder:30b"),
            Some(&ReasoningLevel::Low)
        );

        let _ = std::fs::remove_dir_all(&dir);
    }

    /// Configs from before Step 5b don't have a `reasoning_per_model`
    /// section. Loading them must succeed with an empty map — otherwise
    /// upgrade breaks every existing user.
    #[test]
    fn config_deserializes_without_reasoning_per_model() {
        let toml_blob = r#"
            last_used_model = "ollama/qwen3-coder:30b"

            [default_model]
            provider = "ollama"
            name = "qwen3-coder:30b"
            temperature = 0.7
            max_tokens = 4096
        "#;
        let cfg: Config = toml::from_str(toml_blob).expect("backward compat");
        assert!(cfg.reasoning_per_model.is_empty());
    }
}