llmkit-core 0.1.0

//! Streaming response types.

use std::pin::Pin;

use futures_core::Stream;

use crate::error::LlmResult;
use crate::usage::TokenUsage;

/// A boxed async stream of [`StreamDelta`] items, the unified streaming output
/// across every provider.
pub type ChatStream = Pin<Box<dyn Stream<Item = LlmResult<StreamDelta>> + Send>>;

/// One incremental event in a streaming chat response.
#[derive(Debug, Clone, PartialEq)]
pub enum StreamDelta {
    /// A chunk of generated text.
    Text(String),
    /// A streamed tool call (arguments may arrive incrementally).
    ToolCall {
        /// Provider-assigned tool call id, if known yet.
        id: Option<String>,
        /// Tool name, if known yet.
        name: Option<String>,
        /// Partial JSON arguments for this delta.
        input_delta: String,
    },
    /// Terminal event carrying final usage.
    Done {
        /// Final token usage.
        usage: TokenUsage,
    },
}