llmkit-core 0.1.0

//! The [`LlmProvider`] trait — one call site regardless of backend.

use async_trait::async_trait;

use crate::error::LlmResult;
use crate::stream::ChatStream;
use crate::types::{ChatRequest, ChatResponse, EmbedRequest, EmbedResponse};
use crate::usage::CostEstimate;

/// A unified, async LLM backend.
///
/// Implemented by each provider adapter (OpenAI, Anthropic, Ollama) and by the
/// Tower-wrapped client, so middleware and fallback chains compose transparently.
#[async_trait]
pub trait LlmProvider: Send + Sync + 'static {
    /// Single-shot chat completion.
    async fn chat(&self, req: ChatRequest) -> LlmResult<ChatResponse>;

    /// Streaming chat completion.
    async fn chat_stream(&self, req: ChatRequest) -> LlmResult<ChatStream>;

    /// Generate embeddings.
    async fn embed(&self, req: EmbedRequest) -> LlmResult<EmbedResponse>;

    /// Provider name ("openai" | "anthropic" | "ollama").
    fn name(&self) -> &'static str;

    /// Active default model slug.
    fn model(&self) -> &str;

    /// Pre-flight cost estimate (no network call). `None` if unknown.
    fn estimate_cost(&self, _req: &ChatRequest) -> Option<CostEstimate> {
        None
    }
}

#[async_trait]
impl<T: LlmProvider + ?Sized> LlmProvider for std::sync::Arc<T> {
    async fn chat(&self, req: ChatRequest) -> LlmResult<ChatResponse> {
        (**self).chat(req).await
    }

    async fn chat_stream(&self, req: ChatRequest) -> LlmResult<ChatStream> {
        (**self).chat_stream(req).await
    }

    async fn embed(&self, req: EmbedRequest) -> LlmResult<EmbedResponse> {
        (**self).embed(req).await
    }

    fn name(&self) -> &'static str {
        (**self).name()
    }

    fn model(&self) -> &str {
        (**self).model()
    }

    fn estimate_cost(&self, req: &ChatRequest) -> Option<CostEstimate> {
        (**self).estimate_cost(req)
    }
}