cognate-core 0.1.0

//! Cognate Core — HTTP client, traits, and base types for LLM providers.
//!
//! This crate provides the foundational abstractions for building
//! provider-agnostic LLM applications with type-safe interfaces and
//! zero-cost abstractions.
//!
//! # Quick start
//!
//! ```rust,no_run
//! use cognate_core::{Provider, Request, Message};
//!
//! async fn run<P: Provider>(provider: &P) -> cognate_core::Result<()> {
//!     let response = provider
//!         .complete(
//!             Request::new()
//!                 .with_model("gpt-4o-mini")
//!                 .with_message(Message::user("Hello!")),
//!         )
//!         .await?;
//!     println!("{}", response.content());
//!     Ok(())
//! }
//! ```
#![warn(missing_docs)]

use async_trait::async_trait;
use futures::stream::BoxStream;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;

pub mod error;
pub mod middleware;
pub mod mock;
pub mod ratelimit;
pub mod types;

pub use error::{Error, Result};
pub use middleware::{Layer, Middleware, ProviderExt};
pub use mock::MockProvider;
pub use ratelimit::TokenBucket;

// ─── Provider trait ────────────────────────────────────────────────────────

/// Core trait for all LLM providers.
///
/// Implement this trait to add support for a new LLM provider.
/// The trait is object-safe and supports both full completion and
/// streaming responses.
///
/// # Example
///
/// ```rust,no_run
/// use cognate_core::{Provider, Request, Message};
///
/// async fn example<P: Provider>(provider: &P) -> cognate_core::Result<()> {
///     let request = Request::new()
///         .with_model("gpt-4o")
///         .with_messages(vec![
///             Message::system("You are a helpful assistant"),
///             Message::user("Hello!"),
///         ]);
///
///     let response = provider.complete(request).await?;
///     println!("{}", response.content());
///     Ok(())
/// }
/// ```
#[async_trait]
pub trait Provider: Send + Sync {
    /// Send a completion request and wait for the full response.
    async fn complete(&self, req: Request) -> Result<Response>;

    /// Send a completion request and return a streaming response.
    ///
    /// Returns a stream of [`Chunk`]s as they are generated by the provider.
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// use cognate_core::{Provider, Request, Message};
    /// use futures::StreamExt;
    ///
    /// async fn stream_example<P: Provider>(provider: &P) -> cognate_core::Result<()> {
    ///     let mut stream = provider
    ///         .stream(Request::new().with_model("gpt-4o").with_message(Message::user("Hi")))
    ///         .await?;
    ///     while let Some(chunk) = stream.next().await {
    ///         print!("{}", chunk?.content());
    ///     }
    ///     Ok(())
    /// }
    /// ```
    async fn stream(&self, req: Request) -> Result<BoxStream<'static, Result<Chunk>>>;
}

/// Trait for providers that can generate embedding vectors.
///
/// Implement this alongside [`Provider`] if your backend supports embeddings.
///
/// # Example
///
/// ```rust,no_run
/// use cognate_core::EmbeddingProvider;
///
/// async fn embed<E: EmbeddingProvider>(embedder: &E) -> cognate_core::Result<()> {
///     let vecs = embedder.embed(vec!["Hello world".to_string()]).await?;
///     println!("Embedding dimension: {}", vecs[0].len());
///     Ok(())
/// }
/// ```
#[async_trait]
pub trait EmbeddingProvider: Send + Sync {
    /// Generate embedding vectors for the given list of input strings.
    ///
    /// Returns one vector per input in the same order as `inputs`.
    async fn embed(&self, inputs: Vec<String>) -> Result<Vec<Vec<f32>>>;
}

// ─── Message / Role ────────────────────────────────────────────────────────

/// A single message in a conversation.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct Message {
    /// The role of the message sender.
    pub role: Role,
    /// The text content of the message.
    ///
    /// For assistant messages that contain only tool calls, this may be empty.
    pub content: String,
    /// Optional name used to distinguish multiple participants of the same role.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub name: Option<String>,
    /// Tool calls requested by the assistant, if any.
    ///
    /// Present on messages with `role = Assistant` when the model wants to
    /// invoke one or more tools.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool_calls: Option<Vec<ToolCall>>,
    /// The tool-call ID this message is responding to.
    ///
    /// Must be set on messages with `role = Tool` so the provider can
    /// correlate the result with the originating call.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool_call_id: Option<String>,
}

/// The role of a message sender.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum Role {
    /// A high-level instruction that shapes the assistant's behaviour.
    System,
    /// A message from the human end of the conversation.
    User,
    /// A message generated by the assistant.
    Assistant,
    /// A legacy function-call result (OpenAI function-calling v1).
    #[serde(rename = "function")]
    Function,
    /// A tool-call result sent back by the client.
    Tool,
}

impl Message {
    /// Create a system message.
    pub fn system(content: impl Into<String>) -> Self {
        Self {
            role: Role::System,
            content: content.into(),
            name: None,
            tool_calls: None,
            tool_call_id: None,
        }
    }

    /// Create a user message.
    pub fn user(content: impl Into<String>) -> Self {
        Self {
            role: Role::User,
            content: content.into(),
            name: None,
            tool_calls: None,
            tool_call_id: None,
        }
    }

    /// Create an assistant message.
    pub fn assistant(content: impl Into<String>) -> Self {
        Self {
            role: Role::Assistant,
            content: content.into(),
            name: None,
            tool_calls: None,
            tool_call_id: None,
        }
    }

    /// Create a tool-result message.
    ///
    /// `tool_call_id` must match the `id` of the [`ToolCall`] being answered.
    pub fn tool_result(content: impl Into<String>, tool_call_id: impl Into<String>) -> Self {
        Self {
            role: Role::Tool,
            content: content.into(),
            name: None,
            tool_calls: None,
            tool_call_id: Some(tool_call_id.into()),
        }
    }
}

// ─── Tool calling ──────────────────────────────────────────────────────────

/// A tool invocation requested by the model.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct ToolCall {
    /// Unique identifier for this call, used to correlate the result.
    pub id: String,
    /// The type of call — currently always `"function"`.
    #[serde(rename = "type")]
    pub call_type: String,
    /// The function the model wants to call.
    pub function: ToolCallFunction,
}

/// The function component of a [`ToolCall`].
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct ToolCallFunction {
    /// Name of the function to invoke.
    pub name: String,
    /// JSON-encoded arguments string, e.g. `"{\"query\":\"Rust\"}"`.
    pub arguments: String,
}

// ─── Request ───────────────────────────────────────────────────────────────

/// A completion request sent to a provider.
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct Request {
    /// The model identifier, e.g. `"gpt-4o"` or `"claude-3-5-sonnet-20241022"`.
    pub model: String,
    /// The conversation history, including any system prompt.
    pub messages: Vec<Message>,
    /// Sampling temperature in `[0.0, 2.0]`.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub temperature: Option<f32>,
    /// Maximum tokens to generate.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub max_tokens: Option<u32>,
    /// Nucleus sampling parameter.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub top_p: Option<f32>,
    /// Frequency penalty in `[-2.0, 2.0]` (OpenAI only).
    #[serde(skip_serializing_if = "Option::is_none")]
    pub frequency_penalty: Option<f32>,
    /// Presence penalty in `[-2.0, 2.0]` (OpenAI only).
    #[serde(skip_serializing_if = "Option::is_none")]
    pub presence_penalty: Option<f32>,
    /// Stop sequences.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub stop: Option<Vec<String>>,
    /// Whether to stream the response. Providers handle this internally.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub stream: Option<bool>,
    /// Structured output format (`json_object` etc.).
    #[serde(skip_serializing_if = "Option::is_none")]
    pub response_format: Option<ResponseFormat>,
    /// Provider-specific extra parameters (e.g. `tools`, `tool_choice`).
    #[serde(skip_serializing_if = "HashMap::is_empty", default)]
    pub extra: HashMap<String, serde_json::Value>,
}

impl Request {
    /// Create a new empty request.
    pub fn new() -> Self {
        Self::default()
    }

    /// Set the model identifier.
    pub fn with_model(mut self, model: impl Into<String>) -> Self {
        self.model = model.into();
        self
    }

    /// Set the full message list.
    pub fn with_messages(mut self, messages: Vec<Message>) -> Self {
        self.messages = messages;
        self
    }

    /// Append a single message.
    pub fn with_message(mut self, message: Message) -> Self {
        self.messages.push(message);
        self
    }

    /// Set the sampling temperature.
    pub fn with_temperature(mut self, temperature: f32) -> Self {
        self.temperature = Some(temperature);
        self
    }

    /// Set the maximum number of tokens to generate.
    pub fn with_max_tokens(mut self, max_tokens: u32) -> Self {
        self.max_tokens = Some(max_tokens);
        self
    }

    /// Set the `top_p` nucleus sampling parameter.
    pub fn with_top_p(mut self, top_p: f32) -> Self {
        self.top_p = Some(top_p);
        self
    }

    /// Enable JSON mode (structured output).
    pub fn with_json_mode(mut self) -> Self {
        self.response_format = Some(ResponseFormat::json_object());
        self
    }

    /// Insert a provider-specific extra parameter.
    pub fn with_extra(
        mut self,
        key: impl Into<String>,
        value: impl Into<serde_json::Value>,
    ) -> Self {
        self.extra.insert(key.into(), value.into());
        self
    }
}

// ─── ResponseFormat ────────────────────────────────────────────────────────

/// Structured output format specifier.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ResponseFormat {
    /// The format type — e.g. `"json_object"`.
    #[serde(rename = "type")]
    pub format_type: String,
}

impl ResponseFormat {
    /// Request JSON object output.
    pub fn json_object() -> Self {
        Self {
            format_type: "json_object".to_string(),
        }
    }
}

// ─── Response / Choice / Usage / Chunk ────────────────────────────────────

/// A completed response from a provider.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Response {
    /// Provider-assigned response identifier.
    pub id: String,
    /// Model that generated the response.
    pub model: String,
    /// One or more completion choices (usually one).
    pub choices: Vec<Choice>,
    /// Token usage statistics, if the provider returned them.
    pub usage: Option<Usage>,
    /// Unix timestamp of when the response was created.
    pub created: Option<u64>,
}

impl Response {
    /// Return the text content of the first choice.
    ///
    /// Returns an empty string if there are no choices.
    pub fn content(&self) -> &str {
        self.choices
            .first()
            .map(|c| c.message.content.as_str())
            .unwrap_or("")
    }

    /// Return token usage statistics, if available.
    pub fn usage(&self) -> Option<&Usage> {
        self.usage.as_ref()
    }

    /// Return the tool calls from the first choice, if any.
    pub fn tool_calls(&self) -> Option<&Vec<ToolCall>> {
        self.choices
            .first()
            .and_then(|c| c.message.tool_calls.as_ref())
    }
}

/// A single completion choice within a [`Response`].
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Choice {
    /// Zero-based index of this choice.
    pub index: u32,
    /// The message generated for this choice.
    pub message: Message,
    /// Reason the model stopped generating, e.g. `"stop"` or `"tool_calls"`.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub finish_reason: Option<String>,
}

/// Token usage statistics for a request.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Usage {
    /// Number of tokens in the prompt.
    pub prompt_tokens: u32,
    /// Number of tokens generated.
    pub completion_tokens: u32,
    /// Total tokens consumed (`prompt_tokens + completion_tokens`).
    pub total_tokens: u32,
}

impl Usage {
    /// Calculate the USD cost of this request.
    ///
    /// `prompt_price` and `completion_price` are expressed as USD per 1 000 tokens.
    pub fn calculate_cost(&self, prompt_price: f64, completion_price: f64) -> f64 {
        let prompt_cost = (self.prompt_tokens as f64 / 1000.0) * prompt_price;
        let completion_cost = (self.completion_tokens as f64 / 1000.0) * completion_price;
        prompt_cost + completion_cost
    }
}

/// A single chunk in a streaming response.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Chunk {
    /// Provider-assigned response identifier.
    pub id: String,
    /// Model that generated this chunk.
    pub model: String,
    /// The incremental content delta.
    pub delta: Delta,
    /// Set on the final chunk — e.g. `"stop"` or `"tool_calls"`.
    pub finish_reason: Option<String>,
}

impl Chunk {
    /// Return the incremental text content of this chunk.
    pub fn content(&self) -> &str {
        &self.delta.content
    }

    /// Return `true` if this is the terminal chunk of the stream.
    pub fn is_finished(&self) -> bool {
        self.finish_reason.is_some()
    }
}

/// The incremental content delta inside a [`Chunk`].
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct Delta {
    /// Role of the speaker, present only in the first chunk of a response.
    pub role: Option<Role>,
    /// Incremental text content generated since the previous chunk.
    pub content: String,
}

// ─── ProviderConfig ────────────────────────────────────────────────────────

/// Configuration shared by all provider clients.
#[derive(Debug, Clone)]
pub struct ProviderConfig {
    /// API key used to authenticate requests.
    pub api_key: String,
    /// Override for the provider's default base URL.
    pub base_url: String,
    /// Request timeout in seconds.
    pub timeout_seconds: u64,
    /// Maximum number of automatic retries on transient errors.
    pub max_retries: u32,
}

impl ProviderConfig {
    /// Create a minimal config with only an API key.
    pub fn new(api_key: impl Into<String>) -> Self {
        Self {
            api_key: api_key.into(),
            base_url: String::new(),
            timeout_seconds: 60,
            max_retries: 3,
        }
    }

    /// Override the default base URL (useful for proxies or local servers).
    pub fn with_base_url(mut self, url: impl Into<String>) -> Self {
        self.base_url = url.into();
        self
    }

    /// Set the request timeout in seconds.
    pub fn with_timeout(mut self, seconds: u64) -> Self {
        self.timeout_seconds = seconds;
        self
    }

    /// Set the maximum number of automatic retries.
    pub fn with_max_retries(mut self, retries: u32) -> Self {
        self.max_retries = retries;
        self
    }
}