nexo-llm 0.1.0

LLM provider clients (MiniMax, OpenAI-compat, Anthropic, Gemini) with rate limiter and tool registry.
Documentation
use async_trait::async_trait;
use futures::stream::BoxStream;

use crate::stream::{default_stream_from_chat, StreamChunk};
use crate::types::{ChatRequest, ChatResponse};

#[async_trait]
pub trait LlmClient: Send + Sync {
    async fn chat(&self, req: ChatRequest) -> anyhow::Result<ChatResponse>;
    fn model_id(&self) -> &str;
    /// Short identifier of the provider ("minimax", "openai", "stub", ...).
    /// Used as a Prometheus label; must be low-cardinality.
    fn provider(&self) -> &str {
        "unknown"
    }
    /// Compute dense vector embeddings for a batch of input texts.
    /// Default returns an error — providers that do not expose an embedding
    /// endpoint (or where it is not yet wired) opt out by leaving this default.
    async fn embed(&self, _texts: &[String]) -> anyhow::Result<Vec<Vec<f32>>> {
        Err(anyhow::anyhow!(
            "embed() not supported by provider '{}'",
            self.provider()
        ))
    }

    /// Incremental response stream. Default implementation delegates to
    /// `chat()` and emits a single `TextDelta` (or `ToolCall*` triples)
    /// followed by `Usage` + `End`. Providers that speak SSE override this
    /// to stream real token-level deltas.
    async fn stream<'a>(
        &'a self,
        req: ChatRequest,
    ) -> anyhow::Result<BoxStream<'a, anyhow::Result<StreamChunk>>> {
        default_stream_from_chat(self, req).await
    }
}