omni-dev 0.23.1

//! OpenAI-compatible API client implementation (works with OpenAI, Ollama, etc.).

use std::future::Future;
use std::pin::Pin;

use anyhow::Result;
use reqwest::Client;
use serde::{Deserialize, Serialize};
use tracing::{debug, info};

use super::{AiClient, AiClientMetadata};
use crate::claude::{error::ClaudeError, model_config::get_model_registry};

/// OpenAI API request message.
#[derive(Serialize, Debug)]
struct Message {
    role: String,
    content: String,
}

/// OpenAI API request body.
#[derive(Serialize, Debug)]
struct OpenAiRequest {
    model: String,
    messages: Vec<Message>,
    #[serde(skip_serializing_if = "Option::is_none")]
    max_tokens: Option<i32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    max_completion_tokens: Option<i32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    temperature: Option<f32>,
    stream: bool,
}

/// OpenAI API response choice.
#[derive(Deserialize, Debug)]
struct Choice {
    message: ResponseMessage,
    #[allow(dead_code)] // Populated by serde, retained for debug output
    finish_reason: Option<String>,
}

/// OpenAI API response message.
#[derive(Deserialize, Debug)]
struct ResponseMessage {
    #[allow(dead_code)] // Populated by serde, retained for debug output
    role: String,
    content: String,
}

/// OpenAI API response.
#[derive(Deserialize, Debug)]
struct OpenAiResponse {
    choices: Vec<Choice>,
    model: Option<String>,
    usage: Option<Usage>,
}

/// OpenAI API usage statistics.
#[derive(Deserialize, Debug)]
#[allow(dead_code)] // Fields populated by serde deserialization, not accessed directly
struct Usage {
    prompt_tokens: Option<i32>,
    completion_tokens: Option<i32>,
    total_tokens: Option<i32>,
}

/// OpenAI-compatible API client (works with OpenAI, Ollama, etc.).
pub struct OpenAiAiClient {
    /// HTTP client for API requests.
    client: Client,
    /// API key for authentication (optional for Ollama).
    api_key: Option<String>,
    /// Model identifier.
    model: String,
    /// Base URL for the API (e.g., `https://api.openai.com` or `http://localhost:11434`).
    base_url: String,
    /// Maximum tokens for responses.
    max_tokens: Option<i32>,
    /// Temperature for response generation.
    temperature: Option<f32>,
    /// Active beta header (key, value) if enabled.
    active_beta: Option<(String, String)>,
}

impl OpenAiAiClient {
    /// Creates a new OpenAI-compatible API client.
    pub fn new(
        model: String,
        api_key: Option<String>,
        base_url: String,
        max_tokens: Option<i32>,
        temperature: Option<f32>,
        active_beta: Option<(String, String)>,
    ) -> Result<Self> {
        let client = super::build_http_client()?;

        Ok(Self {
            client,
            api_key,
            model,
            base_url,
            max_tokens,
            temperature,
            active_beta,
        })
    }

    /// Creates a new client for Ollama with sensible defaults.
    pub fn new_ollama(
        model: String,
        base_url: Option<String>,
        active_beta: Option<(String, String)>,
    ) -> Result<Self> {
        Self::new(
            model,
            None, // No API key needed for Ollama
            base_url.unwrap_or_else(|| "http://localhost:11434".to_string()),
            Some(4096), // Reasonable default
            Some(0.1),  // Low temperature for consistent output
            active_beta,
        )
    }

    /// Creates a new client for OpenAI with sensible defaults.
    pub fn new_openai(
        model: String,
        api_key: String,
        active_beta: Option<(String, String)>,
    ) -> Result<Self> {
        Self::new(
            model,
            Some(api_key),
            "https://api.openai.com".to_string(),
            None,      // Use model registry for max tokens
            Some(0.1), // Low temperature for consistent output
            active_beta,
        )
    }

    /// Returns the max tokens from the configured value or falls back to the model registry.
    fn get_max_tokens(&self) -> i32 {
        if let Some(configured_max) = self.max_tokens {
            return configured_max;
        }
        super::registry_max_output_tokens(&self.model, &self.active_beta)
    }

    /// Builds the full API URL.
    fn get_api_url(&self) -> Result<String> {
        let mut base = self.base_url.clone();

        // Ensure base URL doesn't end with a slash
        if base.ends_with('/') {
            base.pop();
        }

        // Add the chat completions endpoint
        let url = format!("{base}/v1/chat/completions");

        debug!(base_url = %self.base_url, full_url = %url, "Constructed OpenAI-compatible API URL");

        Ok(url)
    }

    /// Determines if this is likely an Ollama instance.
    fn is_ollama(&self) -> bool {
        self.base_url.contains("localhost")
            || self.base_url.contains("127.0.0.1")
            || self.api_key.is_none()
    }

    /// Determines if this model is GPT-5 series (uses max_completion_tokens instead of max_tokens).
    fn is_gpt5_series(&self) -> bool {
        self.model.starts_with("gpt-5") || self.model.starts_with("o1")
    }
}

impl AiClient for OpenAiAiClient {
    fn send_request<'a>(
        &'a self,
        system_prompt: &'a str,
        user_prompt: &'a str,
    ) -> Pin<Box<dyn Future<Output = Result<String>> + Send + 'a>> {
        Box::pin(async move {
            debug!(
                system_prompt_len = system_prompt.len(),
                user_prompt_len = user_prompt.len(),
                model = %self.model,
                base_url = %self.base_url,
                is_ollama = self.is_ollama(),
                "Preparing OpenAI-compatible API request"
            );

            // Build messages array with system prompt first, then user prompt
            let mut messages = Vec::new();

            if !system_prompt.is_empty() {
                messages.push(Message {
                    role: "system".to_string(),
                    content: system_prompt.to_string(),
                });
            }

            messages.push(Message {
                role: "user".to_string(),
                content: user_prompt.to_string(),
            });

            let max_tokens = self.get_max_tokens();
            let request = if self.is_gpt5_series() {
                OpenAiRequest {
                    model: self.model.clone(),
                    messages,
                    max_tokens: None,
                    max_completion_tokens: Some(max_tokens),
                    temperature: None, // GPT-5 only supports default temperature (1.0)
                    stream: false,
                }
            } else {
                OpenAiRequest {
                    model: self.model.clone(),
                    messages,
                    max_tokens: Some(max_tokens),
                    max_completion_tokens: None,
                    temperature: self.temperature,
                    stream: false,
                }
            };

            debug!(
                max_tokens = max_tokens,
                configured_temperature = ?self.temperature,
                effective_temperature = ?request.temperature,
                message_count = request.messages.len(),
                is_gpt5_series = self.is_gpt5_series(),
                uses_max_completion_tokens = self.is_gpt5_series(),
                "Built OpenAI-compatible request payload"
            );

            let api_url = self.get_api_url()?;
            info!(url = %api_url, model = %self.model, "Sending request to OpenAI-compatible API");

            // Build the request
            let mut req_builder = self
                .client
                .post(&api_url)
                .header("Content-Type", "application/json")
                .json(&request);

            // Add authorization header if API key is provided
            if let Some(ref api_key) = self.api_key {
                req_builder = req_builder.header("Authorization", format!("Bearer {api_key}"));
            }

            let response = req_builder
                .send()
                .await
                .map_err(|e| ClaudeError::NetworkError(e.to_string()))?;

            let response = super::check_error_response(response).await?;

            let openai_response: OpenAiResponse = response
                .json()
                .await
                .map_err(|e| ClaudeError::InvalidResponseFormat(e.to_string()))?;

            debug!(
                choice_count = openai_response.choices.len(),
                model = ?openai_response.model,
                usage = ?openai_response.usage,
                "Received OpenAI-compatible API response"
            );

            // Extract text content from the first choice
            let result = openai_response
                .choices
                .first()
                .map(|choice| choice.message.content.clone())
                .ok_or_else(|| {
                    ClaudeError::InvalidResponseFormat("No choices in response".to_string()).into()
                });

            super::log_response_success("OpenAI-compatible", &result);

            result
        })
    }

    fn get_metadata(&self) -> AiClientMetadata {
        let registry = get_model_registry();

        // For unknown models, use reasonable defaults
        let max_context_length = if registry.get_input_context(&self.model) > 0 {
            registry.get_input_context(&self.model)
        } else {
            32768 // Reasonable default for modern models
        };

        let max_response_length = if registry.get_max_output_tokens(&self.model) > 0 {
            registry.get_max_output_tokens(&self.model)
        } else {
            4096 // Reasonable default
        };

        let provider = if self.is_ollama() {
            "Ollama".to_string()
        } else {
            "OpenAI".to_string()
        };

        AiClientMetadata {
            provider,
            model: self.model.clone(),
            max_context_length,
            max_response_length,
            active_beta: self.active_beta.clone(),
        }
    }
}

#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
    use super::*;

    #[test]
    fn new_ollama() {
        let client = OpenAiAiClient::new_ollama("llama2".to_string(), None, None).unwrap();
        assert_eq!(client.model, "llama2");
        assert_eq!(client.base_url, "http://localhost:11434");
        assert!(client.api_key.is_none());
        assert!(client.is_ollama());
    }

    #[test]
    fn new_ollama_custom_url() {
        let client = OpenAiAiClient::new_ollama(
            "codellama".to_string(),
            Some("http://192.168.1.100:11434".to_string()),
            None,
        )
        .unwrap();
        assert_eq!(client.base_url, "http://192.168.1.100:11434");
        assert!(client.is_ollama());
    }

    #[test]
    fn new_openai() {
        let client =
            OpenAiAiClient::new_openai("gpt-4".to_string(), "sk-test123".to_string(), None)
                .unwrap();
        assert_eq!(client.model, "gpt-4");
        assert_eq!(client.base_url, "https://api.openai.com");
        assert_eq!(client.api_key, Some("sk-test123".to_string()));
        assert!(!client.is_ollama());
    }

    #[test]
    fn get_api_url() {
        let client = OpenAiAiClient::new_ollama("llama2".to_string(), None, None).unwrap();
        let url = client.get_api_url().unwrap();
        assert_eq!(url, "http://localhost:11434/v1/chat/completions");
    }

    #[test]
    fn get_api_url_trailing_slash() {
        let client = OpenAiAiClient::new(
            "test-model".to_string(),
            None,
            "http://localhost:11434/".to_string(),
            None,
            None,
            None,
        )
        .unwrap();
        let url = client.get_api_url().unwrap();
        assert_eq!(url, "http://localhost:11434/v1/chat/completions");
    }

    #[test]
    fn is_ollama_detection() {
        // Test localhost detection
        let ollama_client = OpenAiAiClient::new(
            "llama2".to_string(),
            None,
            "http://localhost:11434".to_string(),
            None,
            None,
            None,
        )
        .unwrap();
        assert!(ollama_client.is_ollama());

        // Test 127.0.0.1 detection
        let local_client = OpenAiAiClient::new(
            "llama2".to_string(),
            Some("fake-key".to_string()),
            "http://127.0.0.1:11434".to_string(),
            None,
            None,
            None,
        )
        .unwrap();
        assert!(local_client.is_ollama());

        // Test no API key detection
        let no_key_client = OpenAiAiClient::new(
            "llama2".to_string(),
            None,
            "http://remote-server.com".to_string(),
            None,
            None,
            None,
        )
        .unwrap();
        assert!(no_key_client.is_ollama());

        // Test OpenAI detection
        let openai_client = OpenAiAiClient::new(
            "gpt-4".to_string(),
            Some("sk-real-key".to_string()),
            "https://api.openai.com".to_string(),
            None,
            None,
            None,
        )
        .unwrap();
        assert!(!openai_client.is_ollama());
    }

    // ── is_gpt5_series ───────────────────────────────────────────────

    #[test]
    fn gpt5_series_gpt5_models() {
        let client = OpenAiAiClient::new(
            "gpt-5-preview".to_string(),
            Some("key".to_string()),
            "https://api.openai.com".to_string(),
            None,
            None,
            None,
        )
        .unwrap();
        assert!(client.is_gpt5_series());

        let client2 = OpenAiAiClient::new(
            "gpt-5".to_string(),
            Some("key".to_string()),
            "https://api.openai.com".to_string(),
            None,
            None,
            None,
        )
        .unwrap();
        assert!(client2.is_gpt5_series());
    }

    #[test]
    fn gpt5_series_o1_models() {
        let client = OpenAiAiClient::new(
            "o1-mini".to_string(),
            Some("key".to_string()),
            "https://api.openai.com".to_string(),
            None,
            None,
            None,
        )
        .unwrap();
        assert!(client.is_gpt5_series());

        let client2 = OpenAiAiClient::new(
            "o1-preview".to_string(),
            Some("key".to_string()),
            "https://api.openai.com".to_string(),
            None,
            None,
            None,
        )
        .unwrap();
        assert!(client2.is_gpt5_series());
    }

    #[test]
    fn gpt5_series_regular_models_not_matched() {
        let client = OpenAiAiClient::new(
            "gpt-4".to_string(),
            Some("key".to_string()),
            "https://api.openai.com".to_string(),
            None,
            None,
            None,
        )
        .unwrap();
        assert!(!client.is_gpt5_series());

        let client2 = OpenAiAiClient::new(
            "gpt-4o-mini".to_string(),
            Some("key".to_string()),
            "https://api.openai.com".to_string(),
            None,
            None,
            None,
        )
        .unwrap();
        assert!(!client2.is_gpt5_series());
    }

    // ── get_max_tokens ───────────────────────────────────────────────

    #[test]
    fn get_max_tokens_configured_value_wins() {
        let client = OpenAiAiClient::new(
            "gpt-4".to_string(),
            Some("key".to_string()),
            "https://api.openai.com".to_string(),
            Some(8192),
            None,
            None,
        )
        .unwrap();
        assert_eq!(client.get_max_tokens(), 8192);
    }

    #[test]
    fn get_max_tokens_from_registry() {
        // Ollama with no configured max → falls back to registry
        let client =
            OpenAiAiClient::new_openai("gpt-4o".to_string(), "key".to_string(), None).unwrap();
        let tokens = client.get_max_tokens();
        // Registry should return a positive value for a known model
        assert!(tokens > 0, "expected positive token limit, got {tokens}");
    }

    // ── get_metadata ─────────────────────────────────────────────────

    #[test]
    fn get_metadata_openai() {
        let client =
            OpenAiAiClient::new_openai("gpt-4o".to_string(), "key".to_string(), None).unwrap();
        let metadata = client.get_metadata();
        assert_eq!(metadata.provider, "OpenAI");
        assert_eq!(metadata.model, "gpt-4o");
        assert!(metadata.active_beta.is_none());
    }

    #[test]
    fn get_metadata_ollama() {
        let client = OpenAiAiClient::new_ollama("llama2".to_string(), None, None).unwrap();
        let metadata = client.get_metadata();
        assert_eq!(metadata.provider, "Ollama");
        assert_eq!(metadata.model, "llama2");
    }

    #[test]
    fn get_metadata_with_beta() {
        let beta = Some(("anthropic-beta".to_string(), "output-128k".to_string()));
        let client =
            OpenAiAiClient::new_openai("gpt-4o".to_string(), "key".to_string(), beta).unwrap();
        let metadata = client.get_metadata();
        assert!(metadata.active_beta.is_some());
        let (key, value) = metadata.active_beta.unwrap();
        assert_eq!(key, "anthropic-beta");
        assert_eq!(value, "output-128k");
    }

    // ── OpenAiRequest serialization ──────────────────────────────────

    #[test]
    fn request_gpt5_uses_max_completion_tokens() {
        let request = OpenAiRequest {
            model: "gpt-5".to_string(),
            messages: vec![Message {
                role: "user".to_string(),
                content: "hello".to_string(),
            }],
            max_tokens: None,
            max_completion_tokens: Some(4096),
            temperature: None,
            stream: false,
        };

        let json = serde_json::to_string(&request).unwrap();
        assert!(json.contains("max_completion_tokens"));
        // max_tokens should be None and thus skipped
        assert!(!json.contains("\"max_tokens\""));
    }

    #[test]
    fn request_regular_model_uses_max_tokens() {
        let request = OpenAiRequest {
            model: "gpt-4".to_string(),
            messages: vec![Message {
                role: "user".to_string(),
                content: "hello".to_string(),
            }],
            max_tokens: Some(4096),
            max_completion_tokens: None,
            temperature: Some(0.1),
            stream: false,
        };

        let json = serde_json::to_string(&request).unwrap();
        assert!(json.contains("\"max_tokens\""));
        assert!(!json.contains("max_completion_tokens"));
        assert!(json.contains("\"temperature\""));
    }
}