llama-cpp-v3-agent-sdk 0.1.7

Agentic tool-use loop on top of llama-cpp-v3 — local LLM agents with built-in tools
Documentation
use crate::tool::{ToolCall, ToolResult};
use serde::{Deserialize, Serialize};

/// A single message in the conversation.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Message {
    pub role: Role,
    pub content: String,
    /// If this message is an assistant message that contained tool calls,
    /// they are recorded here for context tracking.
    #[serde(skip_serializing_if = "Vec::is_empty", default)]
    pub tool_calls: Vec<ToolCall>,
    /// If this message is a tool result, the originating call is recorded here.
    #[serde(skip_serializing_if = "Option::is_none", default)]
    pub tool_result: Option<ToolCallWithResult>,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum Role {
    System,
    User,
    Assistant,
    Tool,
}

impl Role {
    pub fn as_str(&self) -> &str {
        match self {
            Role::System => "system",
            Role::User => "user",
            Role::Assistant => "assistant",
            Role::Tool => "tool",
        }
    }
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ToolCallWithResult {
    pub call: ToolCall,
    pub result: ToolResult,
}

/// Manages the full conversation history.
pub struct Conversation {
    messages: Vec<Message>,
}

impl Conversation {
    pub fn new() -> Self {
        Self {
            messages: Vec::new(),
        }
    }

    /// Create a new conversation with a system prompt.
    pub fn with_system_prompt(system_prompt: &str) -> Self {
        let mut conv = Self::new();
        conv.add_system(system_prompt);
        conv
    }

    /// Add a system message.
    pub fn add_system(&mut self, content: &str) {
        self.messages.push(Message {
            role: Role::System,
            content: content.to_string(),
            tool_calls: vec![],
            tool_result: None,
        });
    }

    /// Add a user message.
    pub fn add_user(&mut self, content: &str) {
        self.messages.push(Message {
            role: Role::User,
            content: content.to_string(),
            tool_calls: vec![],
            tool_result: None,
        });
    }

    /// Add an assistant message (model output).
    pub fn add_assistant(&mut self, content: &str, tool_calls: Vec<ToolCall>) {
        self.messages.push(Message {
            role: Role::Assistant,
            content: content.to_string(),
            tool_calls,
            tool_result: None,
        });
    }

    /// Add a tool result message.
    pub fn add_tool_result(&mut self, call: ToolCall, result: ToolResult) {
        let content = if result.success {
            format!("[Tool: {}] {}", call.name, result.output)
        } else {
            format!("[Tool: {} ERROR] {}", call.name, result.output)
        };
        self.messages.push(Message {
            role: Role::Tool,
            content,
            tool_calls: vec![],
            tool_result: Some(ToolCallWithResult { call, result }),
        });
    }

    /// Convert conversation to `ChatMessage` slice for the llama-cpp-v3 template engine.
    pub fn to_chat_messages(&self) -> Vec<llama_cpp_v3::ChatMessage> {
        self.messages
            .iter()
            .map(|m| llama_cpp_v3::ChatMessage {
                role: m.role.as_str().to_string(),
                content: m.content.clone(),
            })
            .collect()
    }

    /// Get all messages.
    pub fn messages(&self) -> &[Message] {
        &self.messages
    }

    /// Clear all messages.
    pub fn clear(&mut self) {
        self.messages.clear();
    }

    /// Number of messages in the conversation.
    pub fn len(&self) -> usize {
        self.messages.len()
    }

    pub fn is_empty(&self) -> bool {
        self.messages.is_empty()
    }

    /// Compact the conversation by summarizing older messages.
    ///
    /// Keeps the system prompt and the last `keep_recent` messages,
    /// replacing everything in between with a summary message.
    pub fn compact(&mut self, summary: &str, keep_recent: usize) {
        if self.messages.len() <= keep_recent + 1 {
            return; // nothing to compact
        }

        let system_msg = if !self.messages.is_empty() && self.messages[0].role == Role::System {
            Some(self.messages[0].clone())
        } else {
            None
        };

        let total = self.messages.len();
        let start = if system_msg.is_some() { 1 } else { 0 };
        let keep_from = if total > keep_recent {
            total - keep_recent
        } else {
            start
        };

        // Adjust to a safe cut point (never split a tool call from its result)
        let keep_from = self.find_safe_cut_point(keep_from);

        let recent: Vec<Message> = self.messages[keep_from..].to_vec();

        self.messages.clear();

        if let Some(sys) = system_msg {
            self.messages.push(sys);
        }

        // Insert the compacted summary as a system message
        self.messages.push(Message {
            role: Role::System,
            content: format!("[Conversation Summary]\n{}", summary),
            tool_calls: vec![],
            tool_result: None,
        });

        self.messages.extend(recent);
    }

    /// Find a safe cut point at or before `target_idx`.
    ///
    /// A safe cut point is a turn boundary where we don't split an assistant
    /// message from its following tool-result messages. We walk backward from
    /// `target_idx` to find the start of a complete turn.
    pub fn find_safe_cut_point(&self, target_idx: usize) -> usize {
        let start = if !self.messages.is_empty() && self.messages[0].role == Role::System {
            1
        } else {
            0
        };

        if target_idx <= start {
            return start;
        }

        let mut idx = target_idx.min(self.messages.len());

        // Walk backward to find a point that's NOT in the middle of a
        // tool-call → tool-result pair.
        while idx > start {
            let msg = &self.messages[idx.saturating_sub(1)];
            // If the message just before the cut is a Tool result, keep going
            // back to include the assistant message that triggered it.
            if msg.role == Role::Tool {
                idx -= 1;
            } else if msg.role == Role::Assistant && !msg.tool_calls.is_empty() {
                // The assistant has tool calls — we need to include the
                // tool results that follow it, so cut before this message.
                idx -= 1;
                if idx <= start {
                    break;
                }
            } else {
                break;
            }
        }

        idx.max(start)
    }

    /// Serialize messages in a range to a human-readable string for
    /// summarization by the model.
    pub fn serialize_range(&self, from: usize, to: usize) -> String {
        let mut lines = Vec::new();
        for msg in &self.messages[from..to] {
            let role = match msg.role {
                Role::System => "System",
                Role::User => "User",
                Role::Assistant => "Assistant",
                Role::Tool => "Tool",
            };
            lines.push(format!("[{}]: {}", role, msg.content));
        }
        lines.join("\n\n")
    }

    /// Count of messages that would be compacted (everything between system
    /// prompt and the last `keep_recent` messages).
    pub fn compactable_count(&self, keep_recent: usize) -> usize {
        let start = if !self.messages.is_empty() && self.messages[0].role == Role::System {
            1
        } else {
            0
        };
        let total = self.messages.len();
        if total <= keep_recent + start {
            0
        } else {
            total - keep_recent - start
        }
    }
}

impl Default for Conversation {
    fn default() -> Self {
        Self::new()
    }
}