litellm-rs 0.1.1

//! Fireworks AI provider implementation
//!
//! This module provides Fireworks AI API integration for fast inference.

use super::{BaseProvider, ModelPricing, Provider, ProviderError, ProviderType};
use crate::config::ProviderConfig;
use crate::core::models::{RequestContext, openai::*};
use crate::utils::error::Result;
use async_trait::async_trait;
use serde_json::json;
use std::collections::HashMap;
use tracing::{debug, info};

/// Fireworks AI provider implementation
#[derive(Debug, Clone)]
pub struct FireworksProvider {
    /// Base provider functionality
    base: BaseProvider,
    /// Model pricing cache
    pricing_cache: HashMap<String, ModelPricing>,
}

impl FireworksProvider {
    /// Create a new Fireworks AI provider
    pub async fn new(config: &ProviderConfig) -> Result<Self> {
        let base = BaseProvider::new(config)?;

        let base_url = config
            .base_url
            .clone()
            .unwrap_or_else(|| "https://api.fireworks.ai".to_string());

        let provider = Self {
            base: BaseProvider { base_url, ..base },
            pricing_cache: Self::initialize_pricing_cache(),
        };

        info!(
            "Fireworks AI provider '{}' initialized successfully",
            config.name
        );
        Ok(provider)
    }

    /// Initialize pricing cache with Fireworks AI model prices
    fn initialize_pricing_cache() -> HashMap<String, ModelPricing> {
        let mut cache = HashMap::new();

        // Llama models
        cache.insert(
            "accounts/fireworks/models/llama-v2-7b-chat".to_string(),
            ModelPricing {
                model: "accounts/fireworks/models/llama-v2-7b-chat".to_string(),
                input_cost_per_1k: 0.0002,
                output_cost_per_1k: 0.0002,
                currency: "USD".to_string(),
                updated_at: chrono::Utc::now(),
            },
        );

        cache.insert(
            "accounts/fireworks/models/llama-v2-13b-chat".to_string(),
            ModelPricing {
                model: "accounts/fireworks/models/llama-v2-13b-chat".to_string(),
                input_cost_per_1k: 0.0003,
                output_cost_per_1k: 0.0003,
                currency: "USD".to_string(),
                updated_at: chrono::Utc::now(),
            },
        );

        cache.insert(
            "accounts/fireworks/models/llama-v2-70b-chat".to_string(),
            ModelPricing {
                model: "accounts/fireworks/models/llama-v2-70b-chat".to_string(),
                input_cost_per_1k: 0.0009,
                output_cost_per_1k: 0.0009,
                currency: "USD".to_string(),
                updated_at: chrono::Utc::now(),
            },
        );

        // Mixtral models
        cache.insert(
            "accounts/fireworks/models/mixtral-8x7b-instruct".to_string(),
            ModelPricing {
                model: "accounts/fireworks/models/mixtral-8x7b-instruct".to_string(),
                input_cost_per_1k: 0.0005,
                output_cost_per_1k: 0.0005,
                currency: "USD".to_string(),
                updated_at: chrono::Utc::now(),
            },
        );

        // Code models
        cache.insert(
            "accounts/fireworks/models/starcoder-16b".to_string(),
            ModelPricing {
                model: "accounts/fireworks/models/starcoder-16b".to_string(),
                input_cost_per_1k: 0.0005,
                output_cost_per_1k: 0.0005,
                currency: "USD".to_string(),
                updated_at: chrono::Utc::now(),
            },
        );

        cache
    }

    /// Convert OpenAI messages to Fireworks format
    fn convert_messages_to_fireworks(&self, messages: &[ChatMessage]) -> Vec<serde_json::Value> {
        messages
            .iter()
            .map(|message| {
                let role = match message.role {
                    MessageRole::System => "system",
                    MessageRole::User => "user",
                    MessageRole::Assistant => "assistant",
                    MessageRole::Tool => "assistant",
                    MessageRole::Function => "function", // Fireworks doesn't have tool role
                };

                let content = match &message.content {
                    Some(MessageContent::Text(text)) => text.clone(),
                    Some(MessageContent::Parts(parts)) => {
                        // Fireworks doesn't support multimodal content
                        // Extract text parts only
                        parts
                            .iter()
                            .filter_map(|part| match part {
                                ContentPart::Text { text } => Some(text.clone()),
                                _ => None,
                            })
                            .collect::<Vec<String>>()
                            .join(" ")
                    }
                    None => String::new(),
                };

                json!({
                    "role": role,
                    "content": content
                })
            })
            .collect()
    }

    /// Convert Fireworks response to OpenAI format
    fn convert_fireworks_response_to_openai(
        &self,
        fireworks_response: serde_json::Value,
        model: &str,
    ) -> Result<ChatCompletionResponse> {
        let choices = fireworks_response
            .get("choices")
            .and_then(|c| c.as_array())
            .ok_or_else(|| ProviderError::Parsing("No choices in response".to_string()))?;

        let openai_choices: Result<Vec<ChatCompletionChoice>> = choices
            .iter()
            .enumerate()
            .map(|(index, choice)| {
                let message = choice
                    .get("message")
                    .ok_or_else(|| ProviderError::Parsing("No message in choice".to_string()))?;

                let role = message
                    .get("role")
                    .and_then(|r| r.as_str())
                    .map(|r| match r {
                        "assistant" => MessageRole::Assistant,
                        "user" => MessageRole::User,
                        "system" => MessageRole::System,
                        _ => MessageRole::Assistant,
                    })
                    .unwrap_or(MessageRole::Assistant);

                let content = message
                    .get("content")
                    .and_then(|c| c.as_str())
                    .unwrap_or("")
                    .to_string();

                let finish_reason = choice
                    .get("finish_reason")
                    .and_then(|fr| fr.as_str())
                    .map(|fr| fr.to_string());

                Ok(ChatCompletionChoice {
                    index: index as u32,
                    message: ChatMessage {
                        role,
                        content: Some(MessageContent::Text(content)),
                        name: None,
                        function_call: None,
                        tool_calls: None,
                        tool_call_id: None,
                        audio: None, // Fireworks doesn't support function calling
                    },
                    finish_reason,
                    logprobs: None,
                })
            })
            .collect();

        let usage = fireworks_response.get("usage").map(|u| Usage {
            prompt_tokens: u.get("prompt_tokens").and_then(|v| v.as_u64()).unwrap_or(0) as u32,
            completion_tokens: u
                .get("completion_tokens")
                .and_then(|v| v.as_u64())
                .unwrap_or(0) as u32,
            total_tokens: u.get("total_tokens").and_then(|v| v.as_u64()).unwrap_or(0) as u32,
            prompt_tokens_details: None,
            completion_tokens_details: None,
        });

        Ok(ChatCompletionResponse {
            id: fireworks_response
                .get("id")
                .and_then(|id| id.as_str())
                .unwrap_or(&format!("chatcmpl-fireworks-{}", uuid::Uuid::new_v4()))
                .to_string(),
            object: "chat.completion".to_string(),
            created: fireworks_response
                .get("created")
                .and_then(|c| c.as_u64())
                .unwrap_or_else(|| chrono::Utc::now().timestamp() as u64),
            model: model.to_string(),
            choices: openai_choices?
                .into_iter()
                .map(|choice| ChatChoice {
                    index: choice.index,
                    message: choice.message,
                    logprobs: choice.logprobs.map(|_| Logprobs { content: None }),
                    finish_reason: choice.finish_reason,
                })
                .collect(),
            usage,
            system_fingerprint: None,
        })
    }
}

#[async_trait]
impl Provider for FireworksProvider {
    fn name(&self) -> &str {
        &self.base.name
    }

    fn provider_type(&self) -> ProviderType {
        ProviderType::Custom("fireworks".to_string())
    }

    async fn supports_model(&self, model: &str) -> bool {
        self.base.is_model_supported(model)
            || model.contains("fireworks")
            || model.contains("llama")
            || model.contains("mixtral")
            || model.contains("starcoder")
    }

    async fn supports_images(&self) -> bool {
        false // Fireworks doesn't support image generation
    }

    async fn supports_embeddings(&self) -> bool {
        false // Fireworks doesn't have embedding models
    }

    async fn supports_streaming(&self) -> bool {
        true // Fireworks supports streaming
    }

    async fn list_models(&self) -> Result<Vec<Model>> {
        let url = format!("{}/inference/v1/models", self.base.base_url);

        let response = self
            .base
            .client
            .get(&url)
            .header("Authorization", format!("Bearer {}", self.base.api_key))
            .send()
            .await
            .map_err(|e| ProviderError::Network(e.to_string()))?;

        if !response.status().is_success() {
            // Fallback to known models if API call fails
            let known_models = vec![
                "accounts/fireworks/models/llama-v2-7b-chat",
                "accounts/fireworks/models/llama-v2-13b-chat",
                "accounts/fireworks/models/llama-v2-70b-chat",
                "accounts/fireworks/models/mixtral-8x7b-instruct",
                "accounts/fireworks/models/starcoder-16b",
            ];

            let models = known_models
                .into_iter()
                .map(|model| Model {
                    id: model.to_string(),
                    object: "model".to_string(),
                    created: chrono::Utc::now().timestamp() as u64,
                    owned_by: "fireworks".to_string(),
                })
                .collect();

            return Ok(models);
        }

        let models_response: serde_json::Value = self.base.parse_json_response(response).await?;

        let models = models_response
            .get("data")
            .and_then(|d| d.as_array())
            .unwrap_or(&vec![])
            .iter()
            .filter_map(|model| {
                Some(Model {
                    id: model.get("id")?.as_str()?.to_string(),
                    object: model.get("object")?.as_str()?.to_string(),
                    created: model.get("created")?.as_u64()?,
                    owned_by: model.get("owned_by")?.as_str()?.to_string(),
                })
            })
            .collect();

        Ok(models)
    }

    async fn health_check(&self) -> Result<()> {
        debug!("Performing Fireworks AI health check");

        let url = format!("{}/inference/v1/models", self.base.base_url);

        let response = self
            .base
            .client
            .get(&url)
            .header("Authorization", format!("Bearer {}", self.base.api_key))
            .send()
            .await
            .map_err(|e| ProviderError::Network(e.to_string()))?;

        if response.status().is_success() {
            Ok(())
        } else {
            Err(
                ProviderError::Unknown(format!("Health check failed: {}", response.status()))
                    .into(),
            )
        }
    }

    async fn chat_completion(
        &self,
        request: ChatCompletionRequest,
        _context: RequestContext,
    ) -> Result<ChatCompletionResponse> {
        debug!("Fireworks AI chat completion for model: {}", request.model);

        let messages = self.convert_messages_to_fireworks(&request.messages);

        let mut body = json!({
            "model": request.model,
            "messages": messages
        });

        // Add optional parameters
        if let Some(max_tokens) = request.max_tokens {
            body["max_tokens"] = json!(max_tokens);
        }
        if let Some(temperature) = request.temperature {
            body["temperature"] = json!(temperature);
        }
        if let Some(top_p) = request.top_p {
            body["top_p"] = json!(top_p);
        }
        if let Some(stream) = request.stream {
            body["stream"] = json!(stream);
        }
        if let Some(stop) = request.stop {
            body["stop"] = json!(stop);
        }

        let url = format!("{}/inference/v1/chat/completions", self.base.base_url);

        let response = self
            .base
            .client
            .post(&url)
            .header("Authorization", format!("Bearer {}", self.base.api_key))
            .header("Content-Type", "application/json")
            .json(&body)
            .send()
            .await
            .map_err(|e| ProviderError::Network(e.to_string()))?;

        if !response.status().is_success() {
            let status = response.status();
            let error_text = response.text().await.unwrap_or_default();

            return Err(match status.as_u16() {
                401 => ProviderError::Authentication(error_text),
                429 => ProviderError::RateLimit(error_text),
                404 => ProviderError::ModelNotFound(error_text),
                400 => ProviderError::InvalidRequest(error_text),
                _ => ProviderError::Unknown(format!("HTTP {}: {}", status, error_text)),
            }
            .into());
        }

        let fireworks_response: serde_json::Value = self.base.parse_json_response(response).await?;
        self.convert_fireworks_response_to_openai(fireworks_response, &request.model)
    }

    async fn completion(
        &self,
        request: CompletionRequest,
        _context: RequestContext,
    ) -> Result<CompletionResponse> {
        debug!("Fireworks AI completion for model: {}", request.model);

        let mut body = json!({
            "model": request.model,
            "prompt": request.prompt
        });

        // Add optional parameters
        if let Some(max_tokens) = request.max_tokens {
            body["max_tokens"] = json!(max_tokens);
        }
        if let Some(temperature) = request.temperature {
            body["temperature"] = json!(temperature);
        }
        if let Some(top_p) = request.top_p {
            body["top_p"] = json!(top_p);
        }
        if let Some(stream) = request.stream {
            body["stream"] = json!(stream);
        }
        if let Some(stop) = request.stop {
            body["stop"] = json!(stop);
        }

        let url = format!("{}/inference/v1/completions", self.base.base_url);

        let response = self
            .base
            .client
            .post(&url)
            .header("Authorization", format!("Bearer {}", self.base.api_key))
            .header("Content-Type", "application/json")
            .json(&body)
            .send()
            .await
            .map_err(|e| ProviderError::Network(e.to_string()))?;

        if !response.status().is_success() {
            let status = response.status();
            let error_text = response.text().await.unwrap_or_default();

            return Err(match status.as_u16() {
                401 => ProviderError::Authentication(error_text),
                429 => ProviderError::RateLimit(error_text),
                404 => ProviderError::ModelNotFound(error_text),
                400 => ProviderError::InvalidRequest(error_text),
                _ => ProviderError::Unknown(format!("HTTP {}: {}", status, error_text)),
            }
            .into());
        }

        let fireworks_response: serde_json::Value = self.base.parse_json_response(response).await?;

        let choices = fireworks_response
            .get("choices")
            .and_then(|c| c.as_array())
            .unwrap_or(&vec![])
            .iter()
            .enumerate()
            .map(|(index, choice)| CompletionChoice {
                text: choice
                    .get("text")
                    .and_then(|t| t.as_str())
                    .unwrap_or("")
                    .to_string(),
                index: index as u32,
                logprobs: None,
                finish_reason: choice
                    .get("finish_reason")
                    .and_then(|fr| fr.as_str())
                    .map(|s| s.to_string()),
            })
            .collect();

        let usage = fireworks_response.get("usage").map(|u| Usage {
            prompt_tokens: u.get("prompt_tokens").and_then(|v| v.as_u64()).unwrap_or(0) as u32,
            completion_tokens: u
                .get("completion_tokens")
                .and_then(|v| v.as_u64())
                .unwrap_or(0) as u32,
            total_tokens: u.get("total_tokens").and_then(|v| v.as_u64()).unwrap_or(0) as u32,
            prompt_tokens_details: None,
            completion_tokens_details: None,
        });

        Ok(CompletionResponse {
            id: fireworks_response
                .get("id")
                .and_then(|id| id.as_str())
                .unwrap_or(&format!("cmpl-fireworks-{}", uuid::Uuid::new_v4()))
                .to_string(),
            object: "text_completion".to_string(),
            created: fireworks_response
                .get("created")
                .and_then(|c| c.as_u64())
                .unwrap_or_else(|| chrono::Utc::now().timestamp() as u64),
            model: request.model,
            choices,
            usage,
        })
    }

    async fn embedding(
        &self,
        _request: EmbeddingRequest,
        _context: RequestContext,
    ) -> Result<EmbeddingResponse> {
        Err(
            ProviderError::InvalidRequest("Embeddings not supported by Fireworks AI".to_string())
                .into(),
        )
    }

    async fn image_generation(
        &self,
        _request: ImageGenerationRequest,
        _context: RequestContext,
    ) -> Result<ImageGenerationResponse> {
        Err(ProviderError::InvalidRequest(
            "Image generation not supported by Fireworks AI".to_string(),
        )
        .into())
    }

    async fn get_model_pricing(&self, model: &str) -> Result<ModelPricing> {
        if let Some(pricing) = self.pricing_cache.get(model) {
            Ok(pricing.clone())
        } else {
            // Return default pricing for unknown models
            Ok(ModelPricing {
                model: model.to_string(),
                input_cost_per_1k: 0.0005,
                output_cost_per_1k: 0.0005,
                currency: "USD".to_string(),
                updated_at: chrono::Utc::now(),
            })
        }
    }

    async fn calculate_cost(
        &self,
        model: &str,
        input_tokens: u32,
        output_tokens: u32,
    ) -> Result<f64> {
        let pricing = self.get_model_pricing(model).await?;

        let input_cost = (input_tokens as f64 / 1000.0) * pricing.input_cost_per_1k;
        let output_cost = (output_tokens as f64 / 1000.0) * pricing.output_cost_per_1k;

        Ok(input_cost + output_cost)
    }
}