klieo-core 0.4.0

//! LLM client trait and request/response types.
//!
//! Providers implement [`LlmClient`]. The runtime never depends on a
//! specific provider crate — selection is explicit at app startup.

use crate::error::LlmError;
use async_trait::async_trait;
use futures_core::Stream;
use serde::{Deserialize, Serialize};
use std::pin::Pin;
use std::time::Duration;

/// One message in a chat conversation.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Message {
    /// Speaker role.
    pub role: Role,
    /// Body text. May be empty when `tool_calls` carries the payload.
    pub content: String,
    /// Tool calls the assistant requested in this message.
    #[serde(default)]
    pub tool_calls: Vec<ToolCall>,
    /// When this message answers a tool call, the id of that call.
    #[serde(default)]
    pub tool_call_id: Option<String>,
}

/// Speaker role.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum Role {
    /// System prompt.
    System,
    /// User-supplied input.
    User,
    /// Model output.
    Assistant,
    /// Tool call result fed back into the conversation.
    Tool,
}

/// One tool call requested by the assistant.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ToolCall {
    /// Provider-stable id; echoed back in the matching tool response.
    pub id: String,
    /// Tool name.
    pub name: String,
    /// JSON arguments.
    pub args: serde_json::Value,
}

/// Tool catalogue entry shown to the LLM.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ToolDef {
    /// Tool name (must be unique within the catalogue).
    pub name: String,
    /// Human-readable description for the LLM.
    pub description: String,
    /// JSON-schema for arguments.
    pub json_schema: serde_json::Value,
}

/// Chat completion request.
#[derive(Debug, Clone)]
pub struct ChatRequest {
    /// Conversation history.
    pub messages: Vec<Message>,
    /// Tools available for the model to call.
    pub tools: Vec<ToolDef>,
    /// Sampling temperature.
    pub temperature: Option<f32>,
    /// Maximum response tokens.
    pub max_tokens: Option<u32>,
    /// Output format hint.
    pub response_format: ResponseFormat,
    /// Stop sequences.
    pub stop: Vec<String>,
    /// Per-request deadline. Provider should abort if exceeded.
    pub timeout: Option<Duration>,
}

impl ChatRequest {
    /// Build a request with the supplied messages and no tools.
    pub fn new(messages: Vec<Message>) -> Self {
        Self {
            messages,
            tools: Vec::new(),
            temperature: None,
            max_tokens: None,
            response_format: ResponseFormat::Text,
            stop: Vec::new(),
            timeout: None,
        }
    }
}

/// Output format hint passed to the provider.
#[derive(Debug, Clone)]
pub enum ResponseFormat {
    /// Plain text.
    Text,
    /// Best-effort JSON output.
    Json {
        /// Schema describing expected JSON shape.
        schema: serde_json::Value,
    },
    /// Strict structured output validated against the schema.
    StructuredOutput {
        /// Schema describing expected shape.
        schema: serde_json::Value,
    },
}

/// Chat completion response.
#[derive(Debug, Clone)]
pub struct ChatResponse {
    /// Assistant message returned by the provider.
    pub message: Message,
    /// Token usage.
    pub usage: Usage,
    /// Why the provider stopped generating.
    pub finish_reason: FinishReason,
}

/// Token usage report.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct Usage {
    /// Prompt tokens.
    pub prompt_tokens: u32,
    /// Completion tokens.
    pub completion_tokens: u32,
}

/// Reason the provider stopped generating.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum FinishReason {
    /// Model emitted a stop token or end-of-turn.
    Stop,
    /// Model emitted tool calls; runtime must dispatch them.
    ToolCalls,
    /// Hit `max_tokens`.
    Length,
    /// Provider applied a content filter.
    ContentFilter,
    /// Provider error mid-stream.
    Error,
}

/// Provider capability declaration.
#[derive(Debug, Clone, Default)]
pub struct Capabilities {
    /// Supports tool calls.
    pub tool_calling: bool,
    /// Supports streaming responses.
    pub streaming: bool,
    /// Supports schema-validated structured output.
    pub structured_output: bool,
    /// Supports embeddings.
    pub embeddings: bool,
    /// Maximum context window in tokens.
    pub max_context_tokens: u32,
    /// Supports vision input. (Not used in foundation MVP.)
    pub vision: bool,
}

impl Capabilities {
    /// Start a fluent builder. Equivalent to `Capabilities::default()`
    /// followed by chained setters.
    ///
    /// ```
    /// use klieo_core::Capabilities;
    /// let caps = Capabilities::builder()
    ///     .tool_calling(true)
    ///     .streaming(true)
    ///     .max_context_tokens(8000)
    ///     .build();
    /// assert!(caps.tool_calling);
    /// assert!(caps.streaming);
    /// assert_eq!(caps.max_context_tokens, 8000);
    /// ```
    pub fn builder() -> CapabilitiesBuilder {
        CapabilitiesBuilder(Capabilities::default())
    }
}

/// Fluent builder for [`Capabilities`]. Build via [`Capabilities::builder`].
#[derive(Debug, Clone, Default)]
pub struct CapabilitiesBuilder(Capabilities);

impl CapabilitiesBuilder {
    /// Set the `tool_calling` flag.
    pub fn tool_calling(mut self, v: bool) -> Self {
        self.0.tool_calling = v;
        self
    }
    /// Set the `streaming` flag.
    pub fn streaming(mut self, v: bool) -> Self {
        self.0.streaming = v;
        self
    }
    /// Set the `structured_output` flag.
    pub fn structured_output(mut self, v: bool) -> Self {
        self.0.structured_output = v;
        self
    }
    /// Set the `embeddings` flag.
    pub fn embeddings(mut self, v: bool) -> Self {
        self.0.embeddings = v;
        self
    }
    /// Set the maximum context window in tokens.
    pub fn max_context_tokens(mut self, v: u32) -> Self {
        self.0.max_context_tokens = v;
        self
    }
    /// Set the `vision` flag.
    pub fn vision(mut self, v: bool) -> Self {
        self.0.vision = v;
        self
    }
    /// Consume the builder and return the configured [`Capabilities`].
    pub fn build(self) -> Capabilities {
        self.0
    }
}

/// One chunk of a streaming response.
#[derive(Debug, Clone)]
pub struct ChatChunk {
    /// Incremental content delta.
    pub delta: String,
    /// Tool calls emitted in this chunk (rare; usually only at end).
    pub tool_calls: Vec<ToolCall>,
    /// `Some` once the provider signals completion.
    pub finish_reason: Option<FinishReason>,
}

/// Streaming response handle.
pub type ChunkStream = Pin<Box<dyn Stream<Item = Result<ChatChunk, LlmError>> + Send + 'static>>;

/// Vector embedding for one input text.
pub type Embedding = Vec<f32>;

/// LLM provider trait.
///
/// Implementors live in their own crates (`klieo-llm-ollama`, etc.).
/// `Capabilities` are inspected by the runtime before it sends an
/// unsupported request.
///
/// ```
/// # tokio_test::block_on(async {
/// use klieo_core::test_utils::{FakeLlmClient, FakeLlmStep};
/// use klieo_core::{ChatRequest, FinishReason, LlmClient};
/// let llm = FakeLlmClient::new("fake")
///     .with_steps(vec![FakeLlmStep::Text("hello".into())]);
/// assert_eq!(llm.name(), "fake");
/// assert!(llm.capabilities().tool_calling);
/// let resp = llm.complete(ChatRequest::new(vec![])).await.unwrap();
/// assert_eq!(resp.message.content, "hello");
/// assert_eq!(resp.finish_reason, FinishReason::Stop);
/// # });
/// ```
#[async_trait]
pub trait LlmClient: Send + Sync {
    /// Stable identifier for this client (e.g. `"ollama:qwen2.5:14b"`).
    fn name(&self) -> &str;

    /// Capabilities declared by this client.
    fn capabilities(&self) -> &Capabilities;

    /// One-shot completion.
    ///
    /// ```
    /// # tokio_test::block_on(async {
    /// use klieo_core::test_utils::{FakeLlmClient, FakeLlmStep};
    /// use klieo_core::{ChatRequest, FinishReason, LlmClient};
    /// let llm = FakeLlmClient::new("fake")
    ///     .with_steps(vec![FakeLlmStep::Text("hello".into())]);
    /// let resp = llm.complete(ChatRequest::new(vec![])).await.unwrap();
    /// assert_eq!(resp.message.content, "hello");
    /// assert_eq!(resp.finish_reason, FinishReason::Stop);
    /// # });
    /// ```
    async fn complete(&self, req: ChatRequest) -> Result<ChatResponse, LlmError>;

    /// Streaming completion.
    ///
    /// ```
    /// # tokio_test::block_on(async {
    /// use klieo_core::test_utils::FakeLlmClient;
    /// use klieo_core::{ChatRequest, LlmClient, LlmError};
    /// let llm = FakeLlmClient::new("fake");
    /// match llm.stream(ChatRequest::new(vec![])).await {
    ///     Ok(_) => panic!("expected Unsupported"),
    ///     Err(e) => assert!(matches!(e, LlmError::Unsupported(_))),
    /// }
    /// # });
    /// ```
    async fn stream(&self, req: ChatRequest) -> Result<ChunkStream, LlmError>;

    /// Compute embeddings for the supplied texts.
    ///
    /// ```
    /// # tokio_test::block_on(async {
    /// use klieo_core::test_utils::FakeLlmClient;
    /// use klieo_core::{LlmClient, LlmError};
    /// let llm = FakeLlmClient::new("fake");
    /// let err = llm.embed(&["hello".into()]).await.unwrap_err();
    /// assert!(matches!(err, LlmError::Unsupported(_)));
    /// # });
    /// ```
    async fn embed(&self, texts: &[String]) -> Result<Vec<Embedding>, LlmError>;
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Compile-time check that LlmClient is dyn-compatible.
    #[allow(dead_code)]
    fn _assert_dyn_compatible(_: &dyn LlmClient) {}

    #[test]
    fn chat_request_default_has_no_tools() {
        let req = ChatRequest::new(vec![]);
        assert!(req.tools.is_empty());
        assert!(matches!(req.response_format, ResponseFormat::Text));
    }

    #[test]
    fn capabilities_builder_default_matches_struct_default() {
        let built = Capabilities::builder().build();
        let direct = Capabilities::default();
        assert_eq!(built.tool_calling, direct.tool_calling);
        assert_eq!(built.streaming, direct.streaming);
        assert_eq!(built.structured_output, direct.structured_output);
        assert_eq!(built.embeddings, direct.embeddings);
        assert_eq!(built.max_context_tokens, direct.max_context_tokens);
        assert_eq!(built.vision, direct.vision);
    }

    #[test]
    fn capabilities_builder_sets_tool_calling() {
        let c = Capabilities::builder().tool_calling(true).build();
        assert!(c.tool_calling);
    }

    #[test]
    fn capabilities_builder_sets_streaming() {
        let c = Capabilities::builder().streaming(true).build();
        assert!(c.streaming);
    }

    #[test]
    fn capabilities_builder_sets_structured_output() {
        let c = Capabilities::builder().structured_output(true).build();
        assert!(c.structured_output);
    }

    #[test]
    fn capabilities_builder_sets_embeddings() {
        let c = Capabilities::builder().embeddings(true).build();
        assert!(c.embeddings);
    }

    #[test]
    fn capabilities_builder_sets_max_context_tokens() {
        let c = Capabilities::builder().max_context_tokens(8000).build();
        assert_eq!(c.max_context_tokens, 8000);
    }

    #[test]
    fn capabilities_builder_sets_vision() {
        let c = Capabilities::builder().vision(true).build();
        assert!(c.vision);
    }

    #[test]
    fn capabilities_builder_chains_all_setters() {
        let c = Capabilities::builder()
            .tool_calling(true)
            .streaming(true)
            .structured_output(true)
            .embeddings(true)
            .max_context_tokens(32_000)
            .vision(false)
            .build();
        assert!(c.tool_calling && c.streaming && c.structured_output && c.embeddings);
        assert_eq!(c.max_context_tokens, 32_000);
        assert!(!c.vision);
    }
}