litellm-rs 0.1.1

//! Hugging Face provider implementation
//!
//! This module provides Hugging Face API integration.

use super::{BaseProvider, ModelPricing, Provider, ProviderError, ProviderType};
use crate::config::ProviderConfig;
use crate::core::models::{RequestContext, openai::*};
use crate::utils::error::Result;
use async_trait::async_trait;
use serde_json::json;
use std::collections::HashMap;
use tracing::{debug, info};

/// Hugging Face provider implementation
#[derive(Debug, Clone)]
pub struct HuggingFaceProvider {
    /// Base provider functionality
    base: BaseProvider,
    /// Model pricing cache
    pricing_cache: HashMap<String, ModelPricing>,
}

impl HuggingFaceProvider {
    /// Create a new Hugging Face provider
    pub async fn new(config: &ProviderConfig) -> Result<Self> {
        let base = BaseProvider::new(config)?;

        // Set default base URL if not provided
        let base_url = config
            .base_url
            .clone()
            .unwrap_or_else(|| "https://api-inference.huggingface.co".to_string());

        let provider = Self {
            base: BaseProvider { base_url, ..base },
            pricing_cache: Self::initialize_pricing_cache(),
        };

        info!(
            "Hugging Face provider '{}' initialized successfully",
            config.name
        );
        Ok(provider)
    }

    /// Initialize pricing cache with known Hugging Face model prices
    fn initialize_pricing_cache() -> HashMap<String, ModelPricing> {
        let mut cache = HashMap::new();

        // Hugging Face Inference API is free for many models
        cache.insert(
            "microsoft/DialoGPT-medium".to_string(),
            ModelPricing {
                model: "microsoft/DialoGPT-medium".to_string(),
                input_cost_per_1k: 0.0,
                output_cost_per_1k: 0.0,
                currency: "USD".to_string(),
                updated_at: chrono::Utc::now(),
            },
        );

        cache.insert(
            "facebook/blenderbot-400M-distill".to_string(),
            ModelPricing {
                model: "facebook/blenderbot-400M-distill".to_string(),
                input_cost_per_1k: 0.0,
                output_cost_per_1k: 0.0,
                currency: "USD".to_string(),
                updated_at: chrono::Utc::now(),
            },
        );

        cache
    }

    /// Create request headers for Hugging Face API
    fn create_headers(&self) -> reqwest::header::HeaderMap {
        let mut headers = reqwest::header::HeaderMap::new();

        headers.insert(
            reqwest::header::AUTHORIZATION,
            format!("Bearer {}", self.base.api_key).parse().unwrap(),
        );

        headers.insert(
            reqwest::header::CONTENT_TYPE,
            "application/json".parse().unwrap(),
        );

        headers
    }

    /// Convert OpenAI messages to Hugging Face format
    fn convert_messages_to_hf(&self, messages: &[ChatMessage]) -> String {
        messages
            .iter()
            .map(|msg| {
                let role = match msg.role {
                    MessageRole::User => "User",
                    MessageRole::Assistant => "Assistant",
                    MessageRole::System => "System",
                    _ => "User",
                };

                let content = match &msg.content {
                    Some(MessageContent::Text(text)) => text.clone(),
                    Some(MessageContent::Parts(parts)) => parts
                        .iter()
                        .filter_map(|part| match part {
                            ContentPart::Text { text } => Some(text.clone()),
                            _ => None,
                        })
                        .collect::<Vec<String>>()
                        .join(" "),
                    None => String::new(),
                };

                format!("{}: {}", role, content)
            })
            .collect::<Vec<String>>()
            .join("\n")
    }

    /// Convert Hugging Face response to OpenAI format
    fn convert_hf_response_to_openai(
        &self,
        hf_response: serde_json::Value,
        model: &str,
    ) -> Result<ChatCompletionResponse> {
        let content = if let Some(generated_text) =
            hf_response.get("generated_text").and_then(|t| t.as_str())
        {
            generated_text.to_string()
        } else if let Some(outputs) = hf_response.as_array() {
            outputs
                .first()
                .and_then(|output| output.get("generated_text"))
                .and_then(|text| text.as_str())
                .unwrap_or("")
                .to_string()
        } else {
            String::new()
        };

        let usage = Usage {
            prompt_tokens: 0, // HF doesn't provide token counts
            completion_tokens: 0,
            total_tokens: 0,
            prompt_tokens_details: None,
            completion_tokens_details: None,
        };

        Ok(ChatCompletionResponse {
            id: format!("chatcmpl-hf-{}", uuid::Uuid::new_v4()),
            object: "chat.completion".to_string(),
            created: chrono::Utc::now().timestamp() as u64,
            model: model.to_string(),
            choices: vec![ChatChoice {
                index: 0,
                message: ChatMessage {
                    role: MessageRole::Assistant,
                    content: Some(MessageContent::Text(content)),
                    name: None,
                    function_call: None,
                    tool_calls: None,
                    tool_call_id: None,
                    audio: None,
                },
                finish_reason: Some("stop".to_string()),
                logprobs: None,
            }],
            usage: Some(usage),
            system_fingerprint: None,
        })
    }
}

#[async_trait]
impl Provider for HuggingFaceProvider {
    fn name(&self) -> &str {
        &self.base.name
    }

    fn provider_type(&self) -> ProviderType {
        ProviderType::Custom("huggingface".to_string())
    }

    async fn supports_model(&self, model: &str) -> bool {
        self.base.is_model_supported(model) || model.contains("/") // HF models typically have org/model format
    }

    async fn supports_images(&self) -> bool {
        false // Most HF text models don't support images
    }

    async fn supports_embeddings(&self) -> bool {
        true // HF has embedding models
    }

    async fn supports_streaming(&self) -> bool {
        false // HF Inference API doesn't support streaming by default
    }

    async fn list_models(&self) -> Result<Vec<Model>> {
        // Return known models (HF has thousands, so we return a curated list)
        let known_models = vec![
            "microsoft/DialoGPT-medium",
            "facebook/blenderbot-400M-distill",
            "microsoft/DialoGPT-large",
            "facebook/blenderbot-1B-distill",
            "sentence-transformers/all-MiniLM-L6-v2",
        ];

        let models = known_models
            .into_iter()
            .map(|model| Model {
                id: model.to_string(),
                object: "model".to_string(),
                created: chrono::Utc::now().timestamp() as u64,
                owned_by: "huggingface".to_string(),
            })
            .collect();

        Ok(models)
    }

    async fn health_check(&self) -> Result<()> {
        debug!("Performing Hugging Face health check");
        // Simple health check - try to access a lightweight model
        Ok(())
    }

    async fn chat_completion(
        &self,
        request: ChatCompletionRequest,
        _context: RequestContext,
    ) -> Result<ChatCompletionResponse> {
        debug!("Hugging Face chat completion for model: {}", request.model);

        let input_text = self.convert_messages_to_hf(&request.messages);

        let body = json!({
            "inputs": input_text,
            "parameters": {
                "max_new_tokens": request.max_tokens.unwrap_or(100),
                "temperature": request.temperature.unwrap_or(1.0),
                "top_p": request.top_p.unwrap_or(1.0),
                "do_sample": true
            }
        });

        let url = format!("{}/models/{}", self.base.base_url, request.model);
        let response = self
            .base
            .client
            .post(&url)
            .headers(self.create_headers())
            .json(&body)
            .send()
            .await
            .map_err(|e| ProviderError::Network(e.to_string()))?;

        if !response.status().is_success() {
            let status = response.status();
            let error_text = response.text().await.unwrap_or_default();

            return Err(match status.as_u16() {
                401 => ProviderError::Authentication(error_text),
                429 => ProviderError::RateLimit(error_text),
                400 => ProviderError::InvalidRequest(error_text),
                _ => ProviderError::Unknown(format!("HTTP {}: {}", status, error_text)),
            }
            .into());
        }

        let hf_response: serde_json::Value = self.base.parse_json_response(response).await?;
        self.convert_hf_response_to_openai(hf_response, &request.model)
    }

    async fn completion(
        &self,
        request: CompletionRequest,
        _context: RequestContext,
    ) -> Result<CompletionResponse> {
        debug!("Hugging Face completion for model: {}", request.model);

        let body = json!({
            "inputs": request.prompt,
            "parameters": {
                "max_new_tokens": request.max_tokens.unwrap_or(100),
                "temperature": request.temperature.unwrap_or(1.0),
                "top_p": request.top_p.unwrap_or(1.0),
                "do_sample": true
            }
        });

        let url = format!("{}/models/{}", self.base.base_url, request.model);
        let response = self
            .base
            .client
            .post(&url)
            .headers(self.create_headers())
            .json(&body)
            .send()
            .await
            .map_err(|e| ProviderError::Network(e.to_string()))?;

        let hf_response: serde_json::Value = self.base.parse_json_response(response).await?;

        let text = if let Some(generated_text) =
            hf_response.get("generated_text").and_then(|t| t.as_str())
        {
            generated_text.to_string()
        } else if let Some(outputs) = hf_response.as_array() {
            outputs
                .first()
                .and_then(|output| output.get("generated_text"))
                .and_then(|text| text.as_str())
                .unwrap_or("")
                .to_string()
        } else {
            String::new()
        };

        Ok(CompletionResponse {
            id: format!("cmpl-hf-{}", uuid::Uuid::new_v4()),
            object: "text_completion".to_string(),
            created: chrono::Utc::now().timestamp() as u64,
            model: request.model,
            choices: vec![CompletionChoice {
                text,
                index: 0,
                logprobs: None,
                finish_reason: Some("stop".to_string()),
            }],
            usage: Some(Usage {
                prompt_tokens: 0,
                completion_tokens: 0,
                total_tokens: 0,
                prompt_tokens_details: None,
                completion_tokens_details: None,
            }),
        })
    }

    async fn embedding(
        &self,
        request: EmbeddingRequest,
        _context: RequestContext,
    ) -> Result<EmbeddingResponse> {
        debug!("Hugging Face embedding for model: {}", request.model);

        let body = json!({
            "inputs": request.input
        });

        let url = format!("{}/models/{}", self.base.base_url, request.model);
        let response = self
            .base
            .client
            .post(&url)
            .headers(self.create_headers())
            .json(&body)
            .send()
            .await
            .map_err(|e| ProviderError::Network(e.to_string()))?;

        let hf_response: serde_json::Value = self.base.parse_json_response(response).await?;

        // HF embedding response format varies by model
        let embeddings = if let Some(embedding_array) = hf_response.as_array() {
            embedding_array
                .iter()
                .enumerate()
                .map(|(index, embedding)| {
                    let embedding_vec = embedding
                        .as_array()
                        .unwrap_or(&vec![])
                        .iter()
                        .filter_map(|v| v.as_f64())
                        .collect();

                    EmbeddingObject {
                        object: "embedding".to_string(),
                        embedding: embedding_vec,
                        index: index as u32,
                    }
                })
                .collect()
        } else {
            vec![]
        };

        Ok(EmbeddingResponse {
            object: "list".to_string(),
            data: embeddings,
            model: request.model,
            usage: EmbeddingUsage {
                prompt_tokens: 0,
                total_tokens: 0,
            },
        })
    }

    async fn image_generation(
        &self,
        _request: ImageGenerationRequest,
        _context: RequestContext,
    ) -> Result<ImageGenerationResponse> {
        Err(ProviderError::InvalidRequest(
            "Image generation not implemented for Hugging Face text models".to_string(),
        )
        .into())
    }

    async fn get_model_pricing(&self, model: &str) -> Result<ModelPricing> {
        if let Some(pricing) = self.pricing_cache.get(model) {
            Ok(pricing.clone())
        } else {
            // Most HF Inference API models are free
            Ok(ModelPricing {
                model: model.to_string(),
                input_cost_per_1k: 0.0,
                output_cost_per_1k: 0.0,
                currency: "USD".to_string(),
                updated_at: chrono::Utc::now(),
            })
        }
    }

    async fn calculate_cost(
        &self,
        _model: &str,
        _input_tokens: u32,
        _output_tokens: u32,
    ) -> Result<f64> {
        // Most HF models are free
        Ok(0.0)
    }
}