vespe 0.1.2 - Docs.rs

//! This module defines the data structures used to represent content exchanged with
//! external models (LLMs). It provides a structured way to build prompts, distinguishing
//! between system instructions, user input, and agent responses. The core components
//! are `ModelContentItem`, representing a single part of a conversation, and
//! `ModelContent`, which aggregates multiple `ModelContentItem`s into a complete prompt.

use serde::{Deserialize, Serialize};

/// Represents content originating from the system.
///
/// This struct encapsulates text that provides instructions or context from the
/// system to the model. It's typically used for setting up the model's behavior
/// or providing high-level guidance.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SystemModelContent {
    text: String,
}

/// Represents content originating from the user's prompt or context files.
///
/// This struct holds text input provided directly by the user or gathered from
/// user-defined context files. It forms the primary input for the model's responses.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UserModelContent {
    text: String,
}

/// Represents content generated by an agent or a previous model turn.
///
/// This struct stores text that has been generated by an AI agent or a prior
/// interaction with an external model. It allows for multi-turn conversations
/// and contextual responses.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AgentModelContent {
    author: Option<String>,
    text: String,
}

/// An enum that represents a single piece of content in a conversation or prompt
/// sent to a model. It distinguishes between system instructions, user input,
/// and agent responses.
///
/// Each variant wraps a specific content type ([`SystemModelContent`], [`UserModelContent`], [`AgentModelContent`]),
/// allowing the execution engine to categorize and format different parts of a prompt
/// appropriately for an external model.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ModelContentItem {
    /// Content provided by the system, typically for instructions or context.
    System(SystemModelContent),
    /// Content provided by the user, representing direct input or queries.
    User(UserModelContent),
    /// Content generated by an AI agent or a previous model response.
    Agent(AgentModelContent),
    /// Content to merge to next message as a prefix.
    /// This is used internally to handle special merging logic during prompt construction.
    MergeDownstream(String),
    /// Content to merge to previous message as a postfix.
    /// This is used internally to handle special merging logic during prompt construction.
    MergeUpstream(String),
}

impl ModelContentItem {
    /// Creates a new `ModelContentItem::User` from a string slice.
    ///
    /// # Arguments
    ///
    /// * `text` - The text content for the user message.
    ///
    /// # Returns
    ///
    /// A `ModelContentItem` variant containing the user's text.
    pub fn user(text: &str) -> Self {
        ModelContentItem::User(UserModelContent { text: text.into() })
    }

    /// Creates a new `ModelContentItem::System` from a string slice.
    ///
    /// # Arguments
    ///
    /// * `text` - The text content for the system message.
    ///
    /// # Returns
    ///
    /// A `ModelContentItem` variant containing the system's text.
    pub fn system(text: &str) -> Self {
        ModelContentItem::System(SystemModelContent { text: text.into() })
    }

    /// Creates a new `ModelContentItem::Agent` from a string slice.
    ///
    /// # Arguments
    ///
    /// * `author` - The author of the agent message.
    /// * `text` - The text content for the agent's message.
    ///
    /// # Returns
    ///
    /// A `ModelContentItem` variant containing the agent's text.
    pub fn agent(author: Option<String>, text: &str) -> Self {
        ModelContentItem::Agent(AgentModelContent {
            author,
            text: text.into(),
        })
    }

    /// Creates a new `ModelContentItem::MergeDownstream` from a string slice.
    ///
    /// This variant is used internally to indicate that its content should be prepended
    /// to the next non-merge `ModelContentItem` during prompt construction.
    ///
    /// # Arguments
    ///
    /// * `text` - The text content to be merged downstream.
    ///
    /// # Returns
    ///
    /// A `ModelContentItem::MergeDownstream` variant.
    pub fn merge_downstream(text: &str) -> Self {
        ModelContentItem::MergeDownstream(text.into())
    }

    /// Creates a new `ModelContentItem::MergeUpstream` from a string slice.
    ///
    /// This variant is used internally to indicate that its content should be appended
    /// to the previous non-merge `ModelContentItem` during prompt construction.
    ///
    /// # Arguments
    ///
    /// * `text` - The text content to be merged upstream.
    ///
    /// # Returns
    ///
    /// A `ModelContentItem::MergeUpstream` variant.
    pub fn merge_upstream(text: &str) -> Self {
        ModelContentItem::MergeUpstream(text.into())
    }

    /// Checks if the content of the `ModelContentItem` is empty or contains only whitespace.
    ///
    /// This is useful for filtering out empty messages during prompt construction.
    ///
    /// # Returns
    ///
    /// `true` if the item's text content is empty or only whitespace, `false` otherwise.
    pub fn is_empty(&self) -> bool {
        self.to_string().trim().is_empty()
    }
}

impl ToString for ModelContentItem {
    /// Converts the `ModelContentItem` into its raw text content.
    ///
    /// This implementation of `ToString` simply returns the underlying text
    /// of the content item, without any additional formatting or headers.
    ///
    /// # Returns
    ///
    /// A `String` containing the raw text content.
    fn to_string(&self) -> String {
        match self {
            ModelContentItem::System(content) => format!("{}", content.text),
            ModelContentItem::User(content) => format!("{}", content.text),
            ModelContentItem::Agent(content) => format!("{}", content.text),
            ModelContentItem::MergeDownstream(content) => content.clone(),
            ModelContentItem::MergeUpstream(content) => content.clone(),
        }
    }
}

/// Defines the various formats in which a prompt can be structured for an external model.
#[derive(Clone, Copy)]
pub enum PromptFormat {
    /// Represents a prompt format where each `ModelContentItem` is treated as a distinct "part"
    /// with specific headers (e.g., "System:", "User:", "Assistant:").
    Parts,
}

/// Configuration for how the `ModelContent` should be formatted into a final prompt string.
pub struct PromptConfig {
    /// Optional: The name of the agent to be used in the prompt. If `with_agent_names` is true,
    /// this name will be used to generate a unique agent identifier.
    pub agent: Option<String>,
    /// The desired format for the prompt (e.g., `PromptFormat::Parts`).
    pub format: PromptFormat,
    /// A boolean indicating whether agent names should be included in the formatted prompt.
    pub with_agent_names: bool,
    /// A boolean indicating whether an invitation for the assistant to respond should be appended
    /// to the prompt (e.g., "Assistant:").
    pub with_invitation: bool,
}

/// A struct representing a full conversation or a multi-part prompt.
///
/// `ModelContent` is a collection of `ModelContentItem`s, ordered to form a complete
/// prompt that can be sent to an external model. It allows for building complex
/// conversational contexts.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelContent(pub Vec<ModelContentItem>);

impl ModelContent {
    /// Creates a new, empty `ModelContent` instance.
    ///
    /// # Returns
    ///
    /// A new `ModelContent` with an empty list of items.
    pub fn new() -> Self {
        ModelContent(Vec::new())
    }

    /// Creates a new `ModelContent` instance containing a single `ModelContentItem`.
    ///
    /// # Arguments
    ///
    /// * `item` - The `ModelContentItem` to be wrapped in a new `ModelContent`.
    ///
    /// # Returns
    ///
    /// A new `ModelContent` containing the provided item.
    pub fn from_item(item: ModelContentItem) -> Self {
        ModelContent(vec![item])
    }

    /// Extends the current `ModelContent` with items from another `ModelContent` instance.
    ///
    /// The items from the `content` parameter are appended to the end of the
    /// current `ModelContent`'s list of items.
    ///
    /// # Arguments
    ///
    /// * `content` - Another `ModelContent` instance whose items will be appended.
    pub fn extend(&mut self, content: ModelContent) {
        self.0.extend(content.0.into_iter());
    }

    /// Appends a single `ModelContentItem` to the end of the `ModelContent`.
    ///
    /// # Arguments
    ///
    /// * `item` - The `ModelContentItem` to be added.
    pub fn push(&mut self, item: ModelContentItem) {
        self.0.push(item);
    }

    /// Formats a single `ModelContentItem` into a prompt string using the `PromptFormat::Parts` style.
    ///
    /// This internal helper method applies specific formatting rules for System, User, and Agent
    /// content, including adding headers and handling agent names based on the provided `PromptConfig`.
    ///
    /// # Arguments
    ///
    /// * `item` - The `ModelContentItem` to format.
    /// * `config` - The `PromptConfig` specifying formatting options.
    ///
    /// # Returns
    ///
    /// A `String` representing the formatted content item.
    ///
    /// # Panics
    ///
    /// This method will panic if it encounters a `MergeDownstream` or `MergeUpstream` item,
    /// as these should have been handled in a prior merging pass.
    fn embed_in_prompt_as_part(item: &ModelContentItem, config: &PromptConfig) -> String {
        match item {
            ModelContentItem::System(content) => {
                let text = content.text.trim();
                if !text.is_empty() {
                    format!("---\nSystem:\n{}\n", text)
                } else {
                    text.into()
                }
            }
            ModelContentItem::User(content) => {
                let text = content.text.trim();
                if !text.is_empty() {
                    format!("---\nUser:\n{}\n", text)
                } else {
                    text.into()
                }
            }
            ModelContentItem::Agent(content) => {
                let text = content.text.trim();
                if !text.is_empty() {
                    let name = match config.with_agent_names {
                        false => None,
                        true => content
                            .author
                            .clone()
                            .map(|x| super::names::generate_name(&x)),
                    };
                    if let Some(name) = name {
                        format!("Assistant {}:\n{}\n", name, text)
                    } else {
                        format!("Assistant:\n{}\n", text)
                    }
                } else {
                    text.into()
                }
            }
            _ => {
                panic!("cannot be embedded, bug!");
            }
        }
    }

    /// Dispatches to the appropriate embedding function based on the `PromptFormat` specified in the config.
    ///
    /// This method acts as a router for different prompt formatting strategies.
    ///
    /// # Arguments
    ///
    /// * `item` - The `ModelContentItem` to embed.
    /// * `config` - The `PromptConfig` specifying the desired format.
    ///
    /// # Returns
    ///
    /// A `String` representing the embedded content item.
    fn embed_in_prompt(item: &ModelContentItem, config: &PromptConfig) -> String {
        match config.format {
            PromptFormat::Parts => Self::embed_in_prompt_as_part(item, config),
        }
    }

    /// Converts the entire `ModelContent` into a single formatted prompt string.
    ///
    /// This method iterates through all `ModelContentItem`s and formats each one
    /// using `ModelContentItem::to_prompt()`, then joins them with newline characters.
    /// The result is a complete prompt ready for an external model.
    ///
    /// # Returns
    ///
    /// A `String` representing the concatenated and formatted prompt.
    pub fn to_prompt(&self, config: &PromptConfig) -> String {
        let agent_name = match config.with_agent_names {
            false => None,
            true => config
                .agent
                .clone()
                .map(|x| super::names::generate_name(&x)),
        };

        let identity = agent_name
            .clone()
            .map(|x| format!("You are {}.\n", x))
            .unwrap_or(String::new());

        // Pre pass: prepend identity message
        let mut prepend_identity = vec![ModelContentItem::merge_downstream(&identity)];
        prepend_identity.extend(self.0.clone());

        // Pre pass: delete empty messages
        let non_empty_items = &prepend_identity
            .iter()
            .filter(|x| !x.is_empty())
            .collect::<Vec<&ModelContentItem>>();

        // First pass: merge downstream and upstream messages
        let mut merged_items: Vec<ModelContentItem> = Vec::new();
        let mut downstream_merges: Vec<String> = Vec::new();

        for item in non_empty_items {
            match item {
                ModelContentItem::System(_) | ModelContentItem::User(_) => {
                    let mut current_text = item.to_string();

                    if !downstream_merges.is_empty() {
                        let prefix = downstream_merges.join("\n");
                        current_text = format!("{}\n{}", prefix, current_text);
                        downstream_merges.clear();
                    }

                    let new_item = match item {
                        ModelContentItem::System(_) => ModelContentItem::system(&current_text),
                        ModelContentItem::User(_) => ModelContentItem::user(&current_text),
                        _ => unreachable!(), // We are in the match arm for these types
                    };
                    merged_items.push(new_item);
                }
                ModelContentItem::Agent(agent_item) => {
                    let mut current_text = item.to_string();

                    if !downstream_merges.is_empty() {
                        let prefix = downstream_merges.join("\n");
                        current_text = format!("{}\n{}", prefix, current_text);
                        downstream_merges.clear();
                    }

                    let new_item =
                        ModelContentItem::agent(agent_item.author.clone(), &current_text);

                    merged_items.push(new_item);
                }
                ModelContentItem::MergeDownstream(s) => {
                    downstream_merges.push(s.clone());
                }
                ModelContentItem::MergeUpstream(s) => {
                    if let Some(last_item) = merged_items.last_mut() {
                        let text_to_append = s.clone();
                        match last_item {
                            ModelContentItem::System(c) => {
                                c.text = format!("{}\n{}", c.text, text_to_append)
                            }
                            ModelContentItem::User(c) => {
                                c.text = format!("{}\n{}", c.text, text_to_append)
                            }
                            ModelContentItem::Agent(c) => {
                                c.text = format!("{}\n{}", c.text, text_to_append)
                            }
                            _ => {} // Should not happen as we only push normal items
                        }
                    }
                    // If there's no previous item, we ignore the upstream merge.
                }
            }
        }

        // Second pass: merge consecutive messages of the same type
        let mut final_merged_items: Vec<ModelContentItem> = Vec::new();
        let mut iter = merged_items.into_iter();

        if let Some(mut last) = iter.next() {
            for item in iter {
                match (&mut last, &item) {
                    (
                        ModelContentItem::System(last_content),
                        ModelContentItem::System(item_content),
                    ) => {
                        last_content.text.push('\n');
                        last_content.text.push_str(&item_content.text);
                    }
                    (
                        ModelContentItem::User(last_content),
                        ModelContentItem::User(item_content),
                    ) => {
                        last_content.text.push('\n');
                        last_content.text.push_str(&item_content.text);
                    }
                    (
                        ModelContentItem::Agent(last_content),
                        ModelContentItem::Agent(item_content),
                    ) => {
                        last_content.text.push('\n');
                        last_content.text.push_str(&item_content.text);
                    }
                    _ => {
                        final_merged_items.push(last);
                        last = item;
                    }
                }
            }
            final_merged_items.push(last);
        }

        let mut prompt = final_merged_items
            .iter()
            .map(|item| Self::embed_in_prompt(item, config))
            .collect::<Vec<String>>()
            .join("\n");

        let invitation = match (config.with_invitation, agent_name) {
            (true, Some(agent_name)) => format!("Assistant {}:", agent_name),
            (true, None) => format!("Assistant:"),
            (false, _) => String::new(),
        };

        prompt.push_str(&invitation);

        /*
        Aggiungere questo con gemini e' peggiorativo per multi-persona (example 01), vedendo
        di nuovo "Assistant" prende di nuovo la personalita' del pagliaccio e non del corvo.
        prompt.push_str("Assistant:\n"); // TODO rendere dipendente dal tipo di formattazione?
        */

        prompt
    }
}

impl Default for ModelContent {
    /// Returns a default, empty `ModelContent` instance.
    ///
    /// This is equivalent to calling `ModelContent::new()`.
    fn default() -> Self {
        Self::new()
    }
}

impl ToString for ModelContent {
    /// Converts the entire `ModelContent` into a single raw text string.
    ///
    /// This method concatenates the raw text content of all `ModelContentItem`s,
    /// without any additional formatting or message type headers.
    ///
    /// # Returns
    ///
    /// A `String` containing the concatenated raw text content.
    fn to_string(&self) -> String {
        self.0
            .iter()
            .map(|item| item.to_string())
            .collect::<Vec<String>>()
            .join("\n")
    }
}