poe2-agent 0.5.0

//! ReAct-style tool-calling agent for build analysis.
//!
//! Uses the OpenAI Responses API for function calling to query PoB data
//! on demand rather than dumping everything into the system prompt upfront.

use std::sync::Arc;

use futures_core::Stream;

use crate::llm::{
    input_function_call_output, input_message, ChatGptClient, LlmError, ResponseStreamEvent, Usage,
};
use crate::pob_parser::PobParser;
use crate::tools::{BuildMutation, ToolContext, ToolRegistry};
use crate::trace::{AgentTrace, TraceBuilder, TraceMessage};
use crate::trade::TradeClient;

const MAX_TOOL_ROUNDS: usize = 10;

const SYSTEM_PROMPT: &str = "\
You are a Path of Exile 2 build analysis assistant. The user has uploaded \
their Path of Building export.\n\
\n\
You have tools to inspect the build data. Use them to answer the user's \
questions accurately — do NOT guess at numbers.\n\
\n\
Start by calling get_build_stats to get an overview of the build's offense, \
defense, and resources. Then use get_skill_list or get_config if needed \
to answer the user's specific question.\n\
\n\
Use get_equipped_items to see all gear across every slot in one call — names, \
bases, rarity, and mod lines for filled slots, plus empty slot markers and \
socketed jewels. Use this for broad gear questions before diving into specifics.\n\
\n\
Use get_item to inspect a specific equipment slot when the user asks about \
their gear, an item's mods, or how a particular slot could be upgraded. \
Do not call get_item unless the question is about specific equipment.\n\
\n\
Use analyze_gear_mods for deep mod analysis on a specific gear slot. Unlike \
get_item (which just shows mod text), analyze_gear_mods shows each mod's \
tier, roll quality, affix name, current vs max range, and whether upgrades \
are available at the item's level. Use this when the user asks about mod \
tiers, roll quality, crafting upgrades, or \"how good are my rolls\". \
Not applicable to unique items, flasks, or charms.\n\
\n\
Use get_passive_tree when the user asks about their passive tree, allocated \
nodes, keystones, notables, ascendancy choices, masteries, or jewel sockets. \
It returns all allocated nodes categorized by type.\n\
\n\
Use get_jewel to inspect a jewel socketed in a passive tree socket. First call \
get_passive_tree to get the jewel_sockets list with node IDs, then call \
get_jewel with the node_id to see the jewel's name, base, rarity, and mods.\n\
\n\
Use query_passive_stats to find how much of a specific stat comes from allocated \
passives and what's available nearby on the tree. Provide a stat pattern like \
\"fire damage\" or \"maximum life\". Optionally set radius (default 3) to control \
how far to search from current allocation.\n\
\n\
Use get_unallocated_ascendancy to see which ascendancy nodes the character has \
allocated and which are still available. Returns both primary and secondary \
ascendancy nodes with node names, types, and stats. Use this when recommending \
ascendancy choices or when the user asks what ascendancy nodes to take next.\n\
\n\
Use search_trade to look up item prices on the PoE2 trade site. You can search by \
item name, base type, category, rarity, and stat filters. Stat filters use human-readable \
names like \"maximum life\" or \"fire resistance\" — the tool resolves them to trade API IDs \
automatically. Use this when the user asks \"how much is X worth?\", \"what would an upgrade \
cost?\", or \"what's available on the market?\".\n\
Important search_trade rules:\n\
- The `type` parameter is the base type (e.g. \"Gold Ring\", \"Leather Vest\"). Never pass an \
empty string — either provide a real base type or omit the parameter entirely.\n\
- Do NOT use `max_price` unless the user explicitly asks for a budget or price cap. \
The price filter excludes items listed in other currencies, which hides most results \
and produces misleading prices.\n\
\n\
Use check_currency_price to check exchange rates between currencies. For example, \
\"how many chaos for an exalted?\" or \"what's the divine:chaos ratio?\". Common currency \
IDs: chaos, exalted, divine, regal, aug, transmute, vaal, chance, mirror.\n\
\n\
Be specific and reference actual numbers from the build data when relevant. \
If the data doesn't contain enough information to answer, say so.\n\
\n\
Path of Exile 2 differences from Path of Exile 1 — do NOT confuse these:\n\
- There are NO utility flasks. Players have 2 flask slots (life/mana style only).\n\
- Charms (3 slots) provide passive bonuses and trigger effects — they replace \
much of what utility flasks did in PoE1.\n\
- Spirit is a resource that reserves for persistent buffs, auras, and minions.\n\
- Gear does NOT have gem sockets. Skill gems are equipped independently in \
dedicated active-gem slots, each with support sockets.\n\
- Rune sockets on gear provide bonus stats (via socketed runes).\n\
- Do NOT reference PoE1-specific unique items, support gems, or league mechanics.\n\
- When recommending items, gems, or tree nodes, verify they exist using the \
available tools rather than relying on memory.\n\
\n\
Use search_gems to look up skill gems in the PoE2 database. You can search by \
name, filter by type (active or support), and filter by tags (projectile, fire, \
area, duration, etc.). Always use this tool instead of guessing gem names or tags \
from memory — PoE2 gems are different from PoE1.\n\
\n\
Use search_uniques to look up unique items in the PoE2 database. You can search by \
name or mod text, filter by equipment slot, and filter by level range. Always use this \
tool instead of guessing unique item names or stats from memory.\n\
\n\
Use list_charms to see all available charm bases in PoE2. Charms auto-activate when a \
condition is met (e.g. becoming Frozen) and provide a temporary buff. This returns all \
13 charm bases — no parameters needed. Use this when the user asks about charm options \
or what charms exist.\n\
\n\
Use search_runes to look up runes and soul cores in the PoE2 database. Runes give \
different bonuses depending on the equipment slot they're socketed into. You can search \
by name or stat text, and filter by equipment slot. Always use this tool instead of \
guessing rune names or effects from memory.\n\
\n\
**Theorycrafting workflow** for custom gear:\n\
1. `get_item` — see what's currently equipped in the slot\n\
2. `search_bases` — find valid base type names for the equipment slot\n\
3. `search_mods` — find valid mod text (use item type tag from the base's tags). \
The database shows ranges like `+(5-8) to Strength`; use a specific value like \
`+8 to Strength` in item text.\n\
4. `create_item` — construct item text with the exact base name and specific values \
within mod ranges. Check `matched_mods` and `unmatched_mods` in the response — \
unmatched mods have no stat effect and should be corrected.\n\
5. If `unmatched_mods` is non-empty, use `search_mods` to find the correct mod text \
and re-create the item.\n\
\n\
Always use `search_bases` and `search_mods` before creating items — do NOT guess base \
type names or mod text from memory, as PoE2 data is different from PoE1.";

/// A single turn from a prior conversation. Text only — no tool calls.
#[derive(Debug, Clone)]
pub struct ChatMessage {
    pub role: String,
    pub content: String,
}

/// Events yielded by the agent during a response.
#[non_exhaustive]
pub enum AgentEvent {
    /// The agent is calling a tool (yields tool name for progress indication).
    ToolCall { name: String },
    /// A tool has returned a result (yields tool name and response size).
    ToolResult { name: String, size_bytes: usize },
    /// A token of the final streamed response.
    Token(String),
    /// Cumulative token usage across all LLM calls for this response.
    Usage(Usage),
    /// The agent has produced a build mutation. Emitted after the final response.
    BuildMutation { xml: String, label: String },
    /// Complete reasoning trace for this response. Emitted last.
    Trace(AgentTrace),
}

/// Tool-calling build analysis agent.
///
/// Wraps an LLM client and a shared PoB parser. Each call to `respond`
/// runs a ReAct loop: the LLM decides which tools to call, the agent
/// executes them via the parser, and the results are fed back until
/// the LLM produces a final answer.
pub struct ToolAgent {
    llm: ChatGptClient,
    parser: Arc<PobParser>,
    trade: Option<Arc<TradeClient>>,
}

impl ToolAgent {
    pub fn new(llm: ChatGptClient, parser: Arc<PobParser>, trade: Option<TradeClient>) -> Self {
        Self {
            llm,
            parser,
            trade: trade.map(Arc::new),
        }
    }

    /// Stream a response to a user question about the given build.
    ///
    /// `build_xml` is the raw PoB XML export. The agent loads it into PoB
    /// on each tool call so queries always reflect the full build.
    pub fn respond(
        &self,
        build_xml: &[u8],
        message: &str,
        history: Vec<ChatMessage>,
    ) -> impl Stream<Item = Result<AgentEvent, LlmError>> + Send {
        let llm = self.llm.clone();
        let parser = Arc::clone(&self.parser);
        let trade = self.trade.clone();
        let build_xml = build_xml.to_vec();
        let message = message.to_owned();

        async_stream::try_stream! {
            let registry = ToolRegistry::new(trade.is_some());
            let tools = registry.definitions();

            // Build input items from conversation history.
            let mut input: Vec<serde_json::Value> = Vec::new();
            let trace_history: Vec<TraceMessage> = history
                .iter()
                .filter(|m| m.role == "user" || m.role == "assistant")
                .map(|m| TraceMessage {
                    role: m.role.clone(),
                    content: m.content.clone(),
                })
                .collect();
            for msg in history {
                match msg.role.as_str() {
                    "user" | "assistant" => {
                        input.push(input_message(&msg.role, &msg.content));
                    }
                    _ => {}
                }
            }
            input.push(input_message("user", &message));

            let mut trace = TraceBuilder::new(&message, trace_history);

            // Unified streaming loop with response chaining.
            // Every round streams. Text deltas yield to the user as they arrive.
            // Function calls are collected and executed after the round completes.
            // When a round produces text instead of tool calls, we're done.
            let mut pending_mutation: Option<BuildMutation> = None;
            let mut previous_response_id: Option<String> = None;
            let mut cumulative_usage = Usage::default();

            for _ in 0..MAX_TOOL_ROUNDS {
                // First round: full input, instructions, tools.
                // Chained rounds: tool results + tools (previous_response_id
                // carries conversation context, but tools must be re-sent).
                let (call_input, call_instructions, call_tools) = if previous_response_id.is_some() {
                    (&input[..], None, Some(&tools[..]))
                } else {
                    (&input[..], Some(SYSTEM_PROMPT), Some(&tools[..]))
                };

                let stream = llm.create_response_stream(
                    call_input,
                    call_instructions,
                    call_tools,
                    previous_response_id.as_deref(),
                );
                tokio::pin!(stream);

                let mut function_calls = Vec::new();
                while let Some(event) = futures_lite::StreamExt::next(&mut stream).await {
                    match event? {
                        ResponseStreamEvent::TextDelta(t) => {
                            trace.text_delta(&t);
                            yield AgentEvent::Token(t);
                        }
                        ResponseStreamEvent::FunctionCall(fc) => {
                            function_calls.push(fc);
                        }
                        ResponseStreamEvent::ResponseCompleted { id, usage } => {
                            previous_response_id = Some(id);
                            if let Some(u) = usage { cumulative_usage += u; }
                        }
                    }
                }

                if function_calls.is_empty() {
                    // Model produced text (already streamed) — done.
                    tracing::info!(
                        input_tokens = cumulative_usage.input_tokens,
                        output_tokens = cumulative_usage.output_tokens,
                        cached_tokens = cumulative_usage.cached_tokens(),
                        total_tokens = cumulative_usage.total_tokens,
                        "agent response complete"
                    );
                    yield AgentEvent::Usage(cumulative_usage);
                    if let Some(m) = pending_mutation {
                        trace.build_mutation(&m.label);
                        yield AgentEvent::BuildMutation { xml: m.xml, label: m.label };
                    }
                    yield AgentEvent::Trace(trace.finish(&cumulative_usage));
                    return;
                }

                // Yield tool call events, then execute tools.
                for fc in &function_calls {
                    tracing::debug!(
                        tool = %fc.name,
                        arguments = %fc.arguments,
                        "tool call"
                    );
                    trace.tool_call(&fc.name, &fc.arguments);
                    yield AgentEvent::ToolCall { name: fc.name.clone() };
                }

                let mut tool_results = Vec::new();
                for fc in &function_calls {
                    let ctx = ToolContext {
                        parser: &parser,
                        build_xml: &build_xml,
                        trade: trade.as_deref(),
                    };
                    let result = registry.execute(&ctx, &fc.name, &fc.arguments).await;
                    let content = match result {
                        Ok(tool_result) => {
                            if let Some(m) = tool_result.mutation {
                                pending_mutation = Some(m);
                            }
                            tool_result.response.to_string()
                        }
                        Err(e) => format!("{{\"error\": \"{e}\"}}"),
                    };
                    tracing::debug!(
                        tool = %fc.name,
                        result_bytes = content.len(),
                        result = %content.chars().take(1000).collect::<String>(),
                        "tool result"
                    );
                    trace.tool_result(&fc.name, &content);
                    yield AgentEvent::ToolResult {
                        name: fc.name.clone(),
                        size_bytes: content.len(),
                    };
                    tool_results.push(input_function_call_output(&fc.call_id, &content));
                }

                // Next round: only tool results (server has the rest via chain).
                input = tool_results;
            }

            // Exhausted MAX_TOOL_ROUNDS.
            tracing::info!(
                input_tokens = cumulative_usage.input_tokens,
                output_tokens = cumulative_usage.output_tokens,
                cached_tokens = cumulative_usage.cached_tokens(),
                total_tokens = cumulative_usage.total_tokens,
                "agent response complete"
            );
            yield AgentEvent::Usage(cumulative_usage);
            if let Some(m) = pending_mutation {
                trace.build_mutation(&m.label);
                yield AgentEvent::BuildMutation { xml: m.xml, label: m.label };
            }
            yield AgentEvent::Trace(trace.finish(&cumulative_usage));
        }
    }
}