koda-core 0.2.24

//! LLM provider abstraction layer.
//!
//! Defines a common `Provider` trait for all backends and re-exports
//! the concrete implementations.
//!
//! ## Supported providers
//!
//! | Provider | Module | API style | Local? |
//! |---|---|---|---|
//! | Anthropic Claude | `anthropic` | Native | No |
//! | Google Gemini | `gemini` | Native | No |
//! | OpenAI / GPT | `openai_compat` | OpenAI-compat | No |
//! | LM Studio | `openai_compat` | OpenAI-compat | Yes |
//! | Ollama | `openai_compat` | OpenAI-compat | Yes |
//! | Groq | `openai_compat` | OpenAI-compat | No |
//! | Grok (xAI) | `openai_compat` | OpenAI-compat | No |
//! | DeepSeek | `openai_compat` | OpenAI-compat | No |
//! | OpenRouter | `openai_compat` | OpenAI-compat | No |
//! | Together | `openai_compat` | OpenAI-compat | No |
//! | Mistral | `openai_compat` | OpenAI-compat | No |
//! | Cerebras | `openai_compat` | OpenAI-compat | No |
//! | Fireworks | `openai_compat` | OpenAI-compat | No |
//! | Custom | `openai_compat` | OpenAI-compat | Varies |
//!
//! All OpenAI-compatible providers share the same module with different
//! base URLs. Use `--base-url` to point at any compatible endpoint.
//!
//! ## Design (DESIGN.md)
//!
//! - **Any model, any provider (P1)**: No vendor lock-in.
//!   The tool serves the person, not the platform.
//! - **Context Window Auto-Detection (P1, P3)**: Capabilities are queried
//!   from the provider API at startup. Hardcoded lookup is the fallback,
//!   not the primary source.

/// Anthropic Claude API provider.
pub mod anthropic;
/// Google Gemini API provider.
pub mod gemini;
/// OpenAI-compatible provider (LM Studio, Ollama, vLLM, OpenRouter, etc.).
pub mod openai_compat;
/// Shared SSE stream collector for all providers.
pub mod stream_collector;
/// Streaming XML tag filter for think/reasoning tags.
pub mod stream_tag_filter;

/// Mock provider for deterministic testing.
#[cfg(any(test, feature = "test-support"))]
pub mod mock;

use anyhow::Result;
use async_trait::async_trait;
use serde::{Deserialize, Serialize};

/// A tool call requested by the LLM.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ToolCall {
    /// Provider-assigned call ID (echoed back in tool results).
    pub id: String,
    /// Name of the tool to invoke.
    pub function_name: String,
    /// Raw JSON string of tool arguments.
    pub arguments: String,
    /// Gemini-specific: thought signature that must be echoed back in history.
    #[serde(skip_serializing_if = "Option::is_none", default)]
    pub thought_signature: Option<String>,
}

/// Token usage from an LLM response.
#[derive(Debug, Clone, Default)]
pub struct TokenUsage {
    /// Input tokens sent to the model.
    pub prompt_tokens: i64,
    /// Output tokens generated by the model.
    pub completion_tokens: i64,
    /// Tokens read from provider cache (e.g. Anthropic prompt caching, Gemini cached content).
    pub cache_read_tokens: i64,
    /// Tokens written to provider cache on this request.
    pub cache_creation_tokens: i64,
    /// Tokens used for reasoning/thinking (e.g. OpenAI reasoning_tokens, Anthropic thinking).
    pub thinking_tokens: i64,
    /// Why the model stopped: "end_turn", "max_tokens", "stop_sequence", etc.
    /// Empty string means unknown (provider didn't report it).
    pub stop_reason: String,
}

/// The LLM's response: either text, tool calls, or both.
#[derive(Debug, Clone)]
pub struct LlmResponse {
    /// Text content of the response (may be `None` if tool-calls only).
    pub content: Option<String>,
    /// Tool calls requested by the model.
    pub tool_calls: Vec<ToolCall>,
    /// Token usage statistics.
    pub usage: TokenUsage,
}

/// Base64-encoded image data for multi-modal messages.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ImageData {
    /// MIME type (e.g. "image/png", "image/jpeg").
    pub media_type: String,
    /// Base64-encoded image bytes.
    pub base64: String,
}

/// A single message in the conversation.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChatMessage {
    /// Message role: `"user"`, `"assistant"`, or `"tool"`.
    pub role: String,
    /// Text content (may be `None` for tool-call-only messages).
    pub content: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    /// Tool calls requested by the assistant.
    pub tool_calls: Option<Vec<ToolCall>>,
    #[serde(skip_serializing_if = "Option::is_none")]
    /// ID of the tool call this message responds to.
    pub tool_call_id: Option<String>,
    /// Attached images (only used in-flight, not persisted to DB).
    #[serde(skip_serializing_if = "Option::is_none", default)]
    pub images: Option<Vec<ImageData>>,
}

impl ChatMessage {
    /// Create a simple text message (convenience for the common case).
    ///
    /// # Examples
    ///
    /// ```
    /// use koda_core::providers::ChatMessage;
    ///
    /// let msg = ChatMessage::text("user", "Hello!");
    /// assert_eq!(msg.role, "user");
    /// assert_eq!(msg.content.as_deref(), Some("Hello!"));
    /// assert!(msg.tool_calls.is_none());
    /// ```
    pub fn text(role: &str, content: &str) -> Self {
        Self {
            role: role.to_string(),
            content: Some(content.to_string()),
            tool_calls: None,
            tool_call_id: None,
            images: None,
        }
    }
}

/// Tool definition sent to the LLM.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ToolDefinition {
    /// Tool name (e.g. `"Read"`, `"Bash"`).
    pub name: String,
    /// Human-readable description for the LLM.
    pub description: String,
    /// JSON Schema for the tool's parameters.
    pub parameters: serde_json::Value,
}

/// A discovered model from a provider.
#[derive(Debug, Clone)]
pub struct ModelInfo {
    /// Model identifier (e.g. `"claude-3-5-sonnet-20241022"`).
    pub id: String,
    /// Provider/organization that owns the model.
    #[allow(dead_code)]
    pub owned_by: Option<String>,
}

/// Model capabilities queried from the provider API.
#[derive(Debug, Clone, Default)]
pub struct ModelCapabilities {
    /// Maximum context window in tokens (input + output).
    pub context_window: Option<usize>,
    /// Maximum output tokens the model supports.
    pub max_output_tokens: Option<usize>,
}

/// Is this URL pointing to a local address?
fn is_localhost_url(url: &str) -> bool {
    let lower = url.to_lowercase();
    lower.contains("://localhost") || lower.contains("://127.0.0.1") || lower.contains("://[::1]")
}

/// Build a reqwest client with proper proxy configuration.
///
/// - Reads HTTPS_PROXY / HTTP_PROXY from env
/// - Supports proxy auth via URL (http://user:pass@proxy:port)
/// - Supports separate PROXY_USER / PROXY_PASS env vars
/// - Bypasses proxy for localhost (LM Studio)
/// - Applies a connect timeout (default 30s, env: `KODA_CONNECT_TIMEOUT_SECS`)
///   and a read timeout (default 300s = 5 min, env: `KODA_READ_TIMEOUT_SECS`).
///   We deliberately avoid the total-request `.timeout()` because it would
///   kill long-running SSE streams during slow tool/agent turns.
///
///   The 5-minute default matches `codex-rs/model-provider-info`'s
///   `DEFAULT_STREAM_IDLE_TIMEOUT_MS` and accommodates reasoning-heavy
///   models (Gemini 3.x Pro, MiniMax 2.x, etc.) that may silent-think
///   for several minutes between SSE chunks. See issue #1119.
pub fn build_http_client(base_url: Option<&str>) -> reqwest::Client {
    let mut builder = reqwest::Client::builder();

    // ── Timeouts ──────────────────────────────────────────────────────────
    //
    // connect_timeout: time to establish the TCP+TLS connection. A stuck
    // SYN or hung TLS handshake aborts after this. Always safe to apply
    // because it only governs connection setup, not response reading.
    //
    // read_timeout: maximum idle time between successive reads from the
    // socket. An actively-streaming SSE response keeps resetting this on
    // every chunk, so it doesn't penalize long agent turns; but a server
    // that goes silent (or a half-open connection a NAT box has dropped)
    // will fail fast instead of hanging the agent forever.
    let connect_timeout = crate::runtime_env::get("KODA_CONNECT_TIMEOUT_SECS")
        .and_then(|v| v.parse::<u64>().ok())
        .unwrap_or(30);
    let read_timeout = crate::runtime_env::get("KODA_READ_TIMEOUT_SECS")
        .and_then(|v| v.parse::<u64>().ok())
        .unwrap_or(300);
    builder = builder
        .connect_timeout(std::time::Duration::from_secs(connect_timeout))
        .read_timeout(std::time::Duration::from_secs(read_timeout));

    let proxy_url = crate::runtime_env::get("HTTPS_PROXY")
        .or_else(|| crate::runtime_env::get("HTTP_PROXY"))
        .or_else(|| crate::runtime_env::get("https_proxy"))
        .or_else(|| crate::runtime_env::get("http_proxy"));

    if let Some(ref url) = proxy_url
        && !url.is_empty()
    {
        match reqwest::Proxy::all(url) {
            Ok(mut proxy) => {
                // Bypass proxy for local addresses
                proxy = proxy.no_proxy(reqwest::NoProxy::from_string("localhost,127.0.0.1,::1"));

                // If URL doesn't contain creds, check env vars
                if !url.contains('@') {
                    let user = crate::runtime_env::get("PROXY_USER");
                    let pass = crate::runtime_env::get("PROXY_PASS");
                    if let (Some(u), Some(p)) = (user, pass) {
                        proxy = proxy.basic_auth(&u, &p);
                        tracing::debug!("Using proxy with basic auth (credentials redacted)");
                    }
                }

                builder = builder.proxy(proxy);
                tracing::debug!("Using proxy: {}", redact_url_credentials(url));
            }
            Err(e) => {
                tracing::warn!("Invalid proxy URL '{}': {e}", redact_url_credentials(url));
            }
        }
    }

    // Accept self-signed certs only for localhost (LM Studio, Ollama, vLLM).
    // The env var is still required, but it's now scoped to local addresses.
    let wants_skip_tls = crate::runtime_env::get("KODA_ACCEPT_INVALID_CERTS")
        .map(|v| v == "1" || v == "true")
        .unwrap_or(false);
    let is_local = base_url.is_some_and(is_localhost_url);
    if wants_skip_tls && is_local {
        tracing::info!("TLS certificate validation disabled for local provider.");
        builder = builder.danger_accept_invalid_certs(true);
    } else if wants_skip_tls {
        tracing::warn!(
            "KODA_ACCEPT_INVALID_CERTS is set but provider URL is not localhost — ignoring. \
             TLS bypass is only allowed for local providers (localhost/127.0.0.1)."
        );
    }

    builder.build().unwrap_or_else(|_| reqwest::Client::new())
}

/// Redact embedded credentials from a URL.
///
/// `http://user:pass@proxy:8080` → `http://***:***@proxy:8080`
fn redact_url_credentials(url: &str) -> String {
    // Pattern: scheme://user:pass@host...
    if let Some(at_pos) = url.find('@')
        && let Some(scheme_end) = url.find("://")
    {
        let prefix = &url[..scheme_end + 3]; // "http://"
        let host_part = &url[at_pos..]; // "@proxy:8080/..."
        return format!("{prefix}***:***{host_part}");
    }
    url.to_string()
}

/// A streaming chunk from the LLM.
#[derive(Debug, Clone)]
pub enum StreamChunk {
    /// A text delta (partial content).
    TextDelta(String),
    /// A thinking/reasoning delta from native API (Anthropic extended thinking, OpenAI reasoning).
    ThinkingDelta(String),
    /// A single tool call whose arguments finished streaming.
    ///
    /// Emitted by providers that support per-block completion events (Anthropic
    /// `content_block_stop`). Enables eager execution of read-only tools while
    /// subsequent tool calls are still being streamed.
    ///
    /// Providers that don't support per-block events (OpenAI, Gemini) never
    /// emit this — they only emit `ToolCalls` at stream end.
    ToolCallReady(ToolCall),
    /// All tool calls from the response (batch, emitted at stream end).
    ///
    /// For Anthropic, this only contains tool calls NOT already emitted via
    /// `ToolCallReady`. For other providers, this contains all tool calls.
    ToolCalls(Vec<ToolCall>),
    /// Stream finished with usage info.
    Done(TokenUsage),
    /// The underlying HTTP connection was dropped before the stream completed.
    ///
    /// Distinct from `Done` (clean finish) and user-initiated cancellation
    /// (Ctrl+C). The partial response MUST be discarded — it is incomplete
    /// and storing it would corrupt the session history on resume.
    NetworkError(String),
}

/// Trait for LLM provider backends.
#[async_trait]
pub trait LlmProvider: Send + Sync {
    /// Send a chat completion request (non-streaming).
    async fn chat(
        &self,
        messages: &[ChatMessage],
        tools: &[ToolDefinition],
        settings: &crate::config::ModelSettings,
    ) -> Result<LlmResponse>;

    /// Send a streaming chat completion request.
    /// Returns an [`stream_collector::SseCollector`] with the chunk receiver and a task handle
    /// that can be aborted to immediately kill the HTTP read (#825).
    async fn chat_stream(
        &self,
        messages: &[ChatMessage],
        tools: &[ToolDefinition],
        settings: &crate::config::ModelSettings,
    ) -> Result<stream_collector::SseCollector>;

    /// List available models from the provider.
    async fn list_models(&self) -> Result<Vec<ModelInfo>>;

    /// Query model capabilities (context window, max output tokens) from the API.
    ///
    /// Returns `Ok(caps)` with whatever the provider reports. Fields are `None`
    /// when the API doesn't expose them. Callers should fall back to the
    /// hardcoded lookup table for any `None` fields.
    async fn model_capabilities(&self, _model: &str) -> Result<ModelCapabilities> {
        Ok(ModelCapabilities::default())
    }

    /// Provider display name (for UI).
    fn provider_name(&self) -> &str;
}

// ── Provider factory ──────────────────────────────────────────

use crate::config::{KodaConfig, ProviderType};

/// Create an LLM provider from the given configuration.
pub fn create_provider(config: &KodaConfig) -> Box<dyn LlmProvider + Send + Sync> {
    let api_key = crate::runtime_env::get(config.provider_type.env_key_name());
    match config.provider_type {
        ProviderType::Anthropic => {
            let key = api_key.unwrap_or_else(|| {
                tracing::warn!("No ANTHROPIC_API_KEY set");
                String::new()
            });
            Box::new(anthropic::AnthropicProvider::new(
                key,
                Some(&config.base_url),
            ))
        }
        ProviderType::Gemini => {
            let key = api_key.unwrap_or_else(|| {
                tracing::warn!("No GEMINI_API_KEY set");
                String::new()
            });
            Box::new(gemini::GeminiProvider::new(key, Some(&config.base_url)))
        }
        #[cfg(any(test, feature = "test-support"))]
        ProviderType::Mock => Box::new(mock::MockProvider::from_env()),
        _ => Box::new(openai_compat::OpenAiCompatProvider::new(
            &config.base_url,
            api_key,
        )),
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    // ── is_localhost_url ────────────────────────────────────────────────

    #[test]
    fn test_is_localhost_url_localhost() {
        assert!(is_localhost_url("http://localhost:1234/v1"));
        assert!(is_localhost_url("HTTP://LOCALHOST:11434/api"));
    }

    #[test]
    fn test_is_localhost_url_127() {
        assert!(is_localhost_url("http://127.0.0.1:8000/v1"));
    }

    #[test]
    fn test_is_localhost_url_ipv6() {
        assert!(is_localhost_url("http://[::1]:1234/v1"));
    }

    #[test]
    fn test_is_localhost_url_remote() {
        assert!(!is_localhost_url("https://api.openai.com/v1"));
        assert!(!is_localhost_url("https://api.anthropic.com/v1"));
    }

    // ── redact_url_credentials ─────────────────────────────────────────

    #[test]
    fn test_redact_with_credentials() {
        let result = redact_url_credentials("http://user:secret@proxy.corp.com:8080");
        assert!(
            !result.contains("secret"),
            "credentials should be redacted: {result}"
        );
        assert!(
            result.contains("***:***"),
            "should have redacted placeholder: {result}"
        );
        assert!(
            result.contains("proxy.corp.com"),
            "host should be preserved: {result}"
        );
    }

    #[test]
    fn test_redact_without_credentials() {
        let url = "https://proxy.corp.com:8080";
        assert_eq!(redact_url_credentials(url), url);
    }

    #[test]
    fn test_redact_empty_url() {
        assert_eq!(redact_url_credentials(""), "");
    }

    // ── ChatMessage::text ─────────────────────────────────────────────

    #[test]
    fn test_chat_message_text_builder() {
        let msg = ChatMessage::text("user", "hello world");
        assert_eq!(msg.role, "user");
        assert_eq!(msg.content.as_deref(), Some("hello world"));
        assert!(msg.tool_calls.is_none());
        assert!(msg.tool_call_id.is_none());
        assert!(msg.images.is_none());
    }

    #[test]
    fn test_chat_message_text_assistant() {
        let msg = ChatMessage::text("assistant", "I can help with that.");
        assert_eq!(msg.role, "assistant");
        assert_eq!(msg.content.as_deref(), Some("I can help with that."));
    }

    // ── TokenUsage defaults ────────────────────────────────────────────

    #[test]
    fn test_token_usage_default() {
        let usage = TokenUsage::default();
        assert_eq!(usage.prompt_tokens, 0);
        assert_eq!(usage.completion_tokens, 0);
        assert!(
            usage.stop_reason.is_empty(),
            "default stop_reason should be empty"
        );
    }
}