clawgarden-agent 0.7.6

//! LLM API bridge — multi-backend, OpenAI-compatible chat completions.
//!
//! Supports any OpenAI-compatible API (MiniMAX, OpenAI, DeepSeek, etc.)
//! via environment variables. The LLM sees the conversation with speaker
//! attribution and decides whether to respond or stay silent.

use anyhow::{Context, Result};
use rand::Rng;
use reqwest::Client;
use serde::{Deserialize, Serialize};
use std::sync::OnceLock;
use std::time::Duration;

use clawgarden_proto::config::AppConfig;

// ── LLM Backend Configuration ─────────────────────────────────────────
//
// All settings come from AppConfig (centralized env var loading).
// See clawgarden-proto/src/config.rs for all configurable env vars.

/// Resolved LLM configuration (loaded once from AppConfig)
struct ResolvedLlmConfig {
    api_base: String,
    api_key: String,
    model: String,
    timeout_ms: u64,
    response_max_tokens: u32,
    response_temperature: f32,
    jitter_ms: u64,
    selection_max_tokens: u32,
    selection_temperature: f32,
}

static LLM_CONFIG: OnceLock<ResolvedLlmConfig> = OnceLock::new();

fn llm_config() -> Result<&'static ResolvedLlmConfig> {
    LLM_CONFIG
        .get_or_init(|| {
            let c = AppConfig::load();
            ResolvedLlmConfig {
                api_base: c.llm.api_base.clone(),
                api_key: c.llm.api_key.clone(),
                model: c.llm.model.clone(),
                timeout_ms: c.llm.timeout_ms,
                response_max_tokens: c.agent.response_max_tokens,
                response_temperature: c.agent.response_temperature,
                jitter_ms: c.agent.jitter_ms,
                selection_max_tokens: c.agent.selection_max_tokens,
                selection_temperature: c.agent.selection_temperature,
            }
        })
        .as_result()
}

impl ResolvedLlmConfig {
    /// Returns Err if api_key is not configured
    fn as_result(&self) -> Result<&Self> {
        if self.api_key.is_empty() {
            anyhow::bail!("LLM_API_KEY (or ZAI_API_KEY) not set");
        }
        Ok(self)
    }
}

// ── OpenAI-compatible types ───────────────────────────────────────────────

#[derive(Debug, Serialize)]
struct SerChatMessage {
    role: String,
    content: String,
}

#[derive(Debug, Serialize)]
struct ChatRequest {
    model: String,
    messages: Vec<SerChatMessage>,
    max_tokens: u32,
    temperature: f32,
    #[serde(skip_serializing_if = "Option::is_none")]
    tools: Option<Vec<serde_json::Value>>,
    #[serde(skip_serializing_if = "Option::is_none")]
    tool_choice: Option<String>,
}

#[derive(Debug, Deserialize)]
struct ChatMessage {
    #[allow(dead_code)]
    role: String,
    #[serde(default)]
    content: String,
    #[serde(default)]
    reasoning_content: Option<String>,
    #[serde(default)]
    tool_calls: Option<Vec<serde_json::Value>>,
}

impl ChatMessage {
    fn get_response(&self) -> String {
        if !self.content.is_empty() {
            self.content.clone()
        } else if let Some(ref reasoning) = self.reasoning_content {
            reasoning.trim().to_string()
        } else {
            String::new()
        }
    }
}

#[derive(Debug, Deserialize)]
struct ChatResponse {
    choices: Vec<ChatChoice>,
}

#[derive(Debug, Deserialize)]
struct ChatChoice {
    message: ChatMessage,
}

/// Generate tool definitions to pass to Function Calling.
/// Defines four tools: respond, exec, read_file, create_skill.
fn build_tools_schema() -> &'static Vec<serde_json::Value> {
    use std::sync::OnceLock;
    static SCHEMA: OnceLock<Vec<serde_json::Value>> = OnceLock::new();
    SCHEMA.get_or_init(|| {
        vec![
        serde_json::json!({
            "type": "function",
            "function": {
                "name": "respond",
                "description": "Write a response or choose silence. Put the response in 'text', or set 'silent' to true if not responding.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "text": {
                            "type": "string",
                            "description": "Response text for group chat"
                        },
                        "silent": {
                            "type": "boolean",
                            "description": "True if not responding"
                        }
                    }
                }
            }
        }),
        serde_json::json!({
            "type": "function",
            "function": {
                "name": "exec",
                "description": "Execute a local command. Use when invoking CLI tools per skill instructions. Returns stdout + stderr.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "command": {
                            "type": "string",
                            "description": "Shell command to execute"
                        },
                        "workdir": {
                            "type": "string",
                            "description": "Working directory (absolute path). Defaults to /workspace"
                        },
                        "timeout": {
                            "type": "number",
                            "description": "Timeout in seconds. Defaults to 30"
                        }
                    },
                    "required": ["command"]
                }
            }
        }),
        serde_json::json!({
            "type": "function",
            "function": {
                "name": "read_file",
                "description": "Read contents of a local file. Use for reading skill instruction files or reference documents.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "path": {
                            "type": "string",
                            "description": "Absolute path of the file to read"
                        }
                    },
                    "required": ["path"]
                }
            }
        }),
        serde_json::json!({
            "type": "function",
            "function": {
                "name": "create_skill",
                "description": "Create a new skill. Only use when a task is not covered by existing skills.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "skill_name": {
                            "type": "string",
                            "description": "New skill name"
                        },
                        "description": {
                            "type": "string",
                            "description": "One-line description"
                        },
                        "body": {
                            "type": "string",
                            "description": "Skill body in markdown format"
                        }
                    },
                    "required": ["skill_name", "description", "body"]
                }
            }
        }),
    ]
    })
}

// ── Public API ────────────────────────────────────────────────────────────

/// LLM response — action + tool_call metadata
#[derive(Debug, Clone)]
pub struct LlmResponse {
    /// Parsed action
    pub action: crate::response_parser::ToolCallAction,
    /// LLM-returned tool_call ID (if tool call)
    pub tool_call_id: Option<String>,
    /// Raw LLM-returned tool_call arguments
    pub tool_call_arguments: Option<String>,
}

/// LLM call for the agent loop.
///
/// `messages` accumulates user + assistant_tool_call + tool_result entries.
/// Uses the OpenAI Chat Completions API multi-turn tool call pattern:
///
/// ```text
/// messages = [
///   { role: "user",      content: "Search for main" },
///   { role: "assistant", content: null, tool_calls: [{...}] },
///   { role: "tool",      tool_call_id: "...", content: "main.rs:42" },
///   { role: "assistant", content: null, tool_calls: [{...}] },
///   ...
/// ]
/// ```
pub async fn call_llm(
    agent_name: &str,
    persona: &str,
    memory: &str,
    skills: &str,
    messages: &[crate::agent_loop::ChatMessage],
    history: &[String],
    force: bool,
) -> Result<Option<LlmResponse>> {
    let cfg = llm_config()?;

    let system = build_system_prompt(agent_name, persona, memory, skills, force);

    // Convert messages sequence to API format
    // First user message is formatted via build_user_message
    let mut api_messages = Vec::new();

    // System message
    api_messages.push(serde_json::json!({
        "role": "system",
        "content": system,
    }));

    // Conversation messages (initial user + tool results)
    for msg in messages {
        api_messages.push(msg.to_api_message());
    }

    // Prepend history before the first user message
    // Preserved across repeated calls within the loop
    if !history.is_empty() && !messages.is_empty() {
        if let Some(first) = api_messages.get_mut(1) {
            if first.get("role").and_then(|r| r.as_str()) == Some("user") {
                let original_content = first
                    .get("content")
                    .and_then(|c| c.as_str())
                    .unwrap_or("")
                    .to_string();
                // Only prepend if history has not already been formatted
                if !original_content.contains("Recent conversation:") {
                    let formatted = build_user_message(&original_content, history);
                    *first = serde_json::json!({
                        "role": "user",
                        "content": formatted,
                    });
                }
            }
        }
    }

    let client = Client::builder()
        .timeout(Duration::from_millis(cfg.timeout_ms))
        .build()?;

    let url = format!("{}/chat/completions", cfg.api_base);

    let mut tools_enabled = true;
    let max_attempts = 3;
    let mut attempts = 0;

    let chat = loop {
        attempts += 1;
        let req_body = serde_json::json!({
            "model": cfg.model,
            "messages": api_messages,
            "max_tokens": cfg.response_max_tokens,
            "temperature": cfg.response_temperature,
        });

        // tools/tool_choice are omitted from JSON when null
        let mut req_obj = req_body.as_object().unwrap().clone();
        if tools_enabled {
            req_obj.insert("tools".to_string(), serde_json::json!(build_tools_schema()));
            req_obj.insert("tool_choice".to_string(), serde_json::json!("auto"));
        }
        let req_body = serde_json::Value::Object(req_obj);

        let resp = client
            .post(&url)
            .header("Authorization", format!("Bearer {}", cfg.api_key))
            .header("Content-Type", "application/json")
            .json(&req_body)
            .send()
            .await;

        match resp {
            Ok(r) if r.status().is_success() => {
                break r.json::<ChatResponse>().await.context("LLM parse failed")?;
            }
            Ok(r) => {
                let status = r.status();
                let body = r.text().await.unwrap_or_default();
                if status.as_u16() == 400 && tools_enabled && body.contains("tool") {
                    log::warn!(
                        "Provider rejected tools, retrying without ({}): {}",
                        status,
                        body
                    );
                    tools_enabled = false;
                    continue;
                }
                if (status.as_u16() == 429 || status.is_server_error()) && attempts < max_attempts {
                    log::warn!(
                        "LLM {} (attempt {}/{}), retrying in {}ms",
                        status,
                        attempts,
                        max_attempts,
                        attempts * 1000
                    );
                    tokio::time::sleep(Duration::from_millis(attempts as u64 * 1000)).await;
                    continue;
                }
                anyhow::bail!("LLM error {}: {}", status, body);
            }
            Err(e) => anyhow::bail!("LLM request failed: {}", e),
        }
    };

    parse_llm_response(&chat)
}

/// Common parser to extract LlmResponse from LLM response.
fn parse_llm_response(chat: &ChatResponse) -> Result<Option<LlmResponse>> {
    let msg = match chat.choices.first() {
        Some(c) => &c.message,
        None => return Ok(None),
    };

    // Extract tool_call_id from the first tool_call
    let tool_call_id = msg.tool_calls.as_ref().and_then(|tc| {
        tc.first().and_then(|t| {
            t.get("id")
                .and_then(|id| id.as_str())
                .map(|s| s.to_string())
        })
    });
    let tool_call_arguments = msg.tool_calls.as_ref().and_then(|tc| {
        tc.first().and_then(|t| {
            t.get("function")
                .and_then(|f| f.get("arguments"))
                .and_then(|a| a.as_str())
                .map(|s| s.to_string())
        })
    });

    // 1. Function Calling result takes priority
    if let Some(ref tool_calls) = msg.tool_calls {
        if !tool_calls.is_empty() {
            if tool_calls.len() > 1 {
                log::warn!(
                    "LLM returned {} tool_calls, only processing first",
                    tool_calls.len()
                );
            }
            if let Some(action) =
                crate::response_parser::ResponseParser::parse_tool_calls(tool_calls)
            {
                return Ok(Some(LlmResponse {
                    action,
                    tool_call_id,
                    tool_call_arguments,
                }));
            }
            // Tool name/arguments couldn't be parsed — use text response if available
            let text = msg.get_response();
            if !text.is_empty() {
                let action = crate::response_parser::ResponseParser::text_fallback(&text);
                return Ok(Some(LlmResponse {
                    action,
                    tool_call_id: None,
                    tool_call_arguments: None,
                }));
            }
            return Ok(None);
        }
    }

    // 2. Text fallback (Function Calling not used or not available)
    let content = msg.get_response();
    if content.is_empty() {
        return Ok(None);
    }

    let action = crate::response_parser::ResponseParser::text_fallback(&content);
    Ok(Some(LlmResponse {
        action,
        tool_call_id: None,
        tool_call_arguments: None,
    }))
}

/// Combined judge + response via single LLM call.
///
/// `message` is the formatted message with speaker attribution.
/// `history` is recent conversation lines.
/// `force` = true means this is a fallback — nobody else responded, so be more willing to speak.
///
/// Returns `None` if the agent should stay silent, `Some(ToolCallAction)` if responding.
///
/// **Deprecated**: Use `call_llm()` with `agent_loop` instead.
#[deprecated(note = "Use call_llm() with agent_loop instead")]
pub async fn judge_and_respond(
    agent_name: &str,
    persona: &str,
    memory: &str,
    skills: &str,
    message: &str,
    history: &[String],
    force: bool,
) -> Result<Option<crate::response_parser::ToolCallAction>> {
    let cfg = llm_config()?;

    // Small random jitter to stagger concurrent LLM calls
    let jitter = rand::thread_rng().gen_range(0..cfg.jitter_ms);
    tokio::time::sleep(Duration::from_millis(jitter)).await;

    // Build messages from a single user message
    let messages = vec![crate::agent_loop::ChatMessage::user(message)];

    call_llm(
        agent_name, persona, memory, skills, &messages, history, force,
    )
    .await
    .map(|opt| opt.map(|r| r.action))
}

// ── Prompt Engineering ────────────────────────────────────────────────────

fn build_system_prompt(
    agent_name: &str,
    persona: &str,
    memory: &str,
    skills: &str,
    force: bool,
) -> String {
    let mut p = String::new();

    // ── Identity ──
    if !persona.is_empty() {
        p.push_str(persona);
        p.push_str("\n\n");
    } else {
        p.push_str(&format!("You are {}.\n\n", agent_name));
    }

    // ── My Telegram username ──
    if let Ok(username) = std::env::var("TELEGRAM_BOT_USERNAME") {
        p.push_str(&format!("Your Telegram username: @{}\n", username));
    }

    // ── Team members (comma-separated: "name1:@username1,name2:@username2,...") ──
    // This tells the LLM exactly how to refer to each member.
    if let Ok(members) = std::env::var("TEAM_MEMBERS") {
        if !members.is_empty() {
            p.push_str("\nGroup members (name: @username):\n");
            for entry in members.split(',') {
                let entry = entry.trim();
                if !entry.is_empty() {
                    p.push_str(&format!("- {}\n", entry));
                }
            }
        }
    } else {
        p.push_str("\nGroup members: other AI members (names shown in conversation).\n");
    }
    p.push_str("When addressing a member, use their exact @username from the list. Never invent usernames.\n\n");

    // ── Memory ──
    if !memory.is_empty() {
        p.push_str("[Things you remember]\n");
        p.push_str(memory);
        p.push_str("\n\n");
    }

    // ── Skills ──
    if !skills.is_empty() {
        p.push_str(skills);
        p.push_str(
            "Skill usage:\n\
   - Skills are tool usage guides. Each skill teaches you how to use a CLI or tool.\n\
   - When a task matches a skill, call read_file to load the skill instructions.\n\
   - Then use exec to run commands as described in the skill.\n\
   - You can also run commands directly with exec without loading a skill.\n\
   - To create a new skill, use the create_skill function.\n\n",
        );
    }

    // ── Group chat rules ──
    p.push_str(
r#"You are an AI member in a group chat.

Rules:
1. Respond when: the message is relevant to your expertise, someone addresses you, or it's a greeting/question directed at the group.
2. Do NOT respond when: someone already gave a good answer, you'd add nothing new, or it's a trivial acknowledgment.
3. When responding: be concise (1-3 sentences), match the user's language, be natural. Never prefix with your own name.
4. Use the provided functions to act: call respond(text="...") to reply, or respond(silent=true) to stay silent.

Remember: quality over quantity. If someone already answered well and you have nothing new to add, stay silent."#,
    );

    if force {
        p.push_str(
            "\n\nIMPORTANT: No other member responded to this message.\nYou should respond briefly. If it's a greeting, greet back. If it's a question, answer it.\nDo NOT stay silent."
        );
    }

    p
}

/// Build the user-facing message that includes conversation history
/// with speaker attribution.
/// `history` has already been selected within token budget by ContextManager, so no further trimming.
fn build_user_message(message: &str, history: &[String]) -> String {
    let mut out = String::new();

    if !history.is_empty() {
        out.push_str("Recent conversation:\n");
        for line in history.iter() {
            out.push_str(line);
            out.push('\n');
        }
        out.push('\n');
    }

    out.push_str("Current message:\n");
    out.push_str(message);
    out.push_str("\n\nShould you respond? Use the respond function with your reply, or respond(silent=true) if you have nothing to add.");

    out
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_build_user_message() {
        let msg = "[User]: Hello";
        let history = vec![
            "[camus]: Nice to meet you".to_string(),
            "[eleven]: Welcome".to_string(),
        ];
        let result = build_user_message(msg, &history);
        assert!(result.contains("Recent conversation"));
        assert!(result.contains("[camus]"));
        assert!(result.contains("[User]"));
    }

    #[test]
    fn test_system_prompt_has_rules() {
        let p = build_system_prompt("eleven", "", "", "", false);
        assert!(p.contains("respond") || p.contains("exec"));
        assert!(p.contains("Group members"));
    }

    #[test]
    fn test_force_prompt() {
        let p = build_system_prompt("eleven", "", "", "", true);
        assert!(p.contains("No other member responded"));
        let p2 = build_system_prompt("eleven", "", "", "", false);
        assert!(!p2.contains("No other member responded"));
    }

    #[test]
    fn test_tools_schema_has_three_tools() {
        let tools = build_tools_schema();
        assert_eq!(tools.len(), 4);
        let names: Vec<&str> = tools
            .iter()
            .filter_map(|t| t.get("function")?.get("name")?.as_str())
            .collect();
        assert!(names.contains(&"respond"));
        assert!(names.contains(&"exec"));
        assert!(names.contains(&"read_file"));
        assert!(names.contains(&"create_skill"));
    }
}

// ── Turn-based self-selection ──────────────────────────────────────────

/// Result from the "should I respond?" LLM call.
#[derive(Debug, Clone, Deserialize)]
pub struct ShouldRespondResult {
    pub respond: bool,
    pub confidence: f32,
    pub reason: String,
}

/// Ask the LLM whether this agent should respond to the current turn.
/// Uses a lightweight call (max_tokens=64, temperature=0.1).
/// The LLM sees the conversation history and any previous responses from
/// this round, then decides if it has something unique to contribute.
pub async fn should_i_respond(
    agent_name: &str,
    persona: &str,
    history: &[String],
    user_message: &str,
    previous_responses: &[String],
) -> Result<ShouldRespondResult> {
    let cfg = llm_config()?;

    let system = build_selection_prompt(agent_name, persona);
    let user = build_selection_user_message(user_message, history, previous_responses);

    let request = ChatRequest {
        model: cfg.model.clone(),
        messages: vec![
            SerChatMessage {
                role: "system".into(),
                content: system,
            },
            SerChatMessage {
                role: "user".into(),
                content: user,
            },
        ],
        max_tokens: cfg.selection_max_tokens,
        temperature: cfg.selection_temperature,
        tools: None,
        tool_choice: None,
    };

    let client = Client::builder().timeout(Duration::from_secs(5)).build()?;

    let url = format!("{}/chat/completions", cfg.api_base);

    // Retry up to 2 times on transient errors
    let mut attempts = 0;
    let max_attempts = 3;
    let resp = loop {
        attempts += 1;
        let r = client
            .post(&url)
            .header("Authorization", format!("Bearer {}", cfg.api_key))
            .header("Content-Type", "application/json")
            .json(&request)
            .send()
            .await;

        match r {
            Ok(r) if r.status().is_success() => break r,
            Ok(r) => {
                let status = r.status();
                let body = r.text().await.unwrap_or_default();
                if (status.as_u16() == 429 || status.is_server_error()) && attempts < max_attempts {
                    log::warn!(
                        "should_i_respond {} (attempt {}/{}), retrying",
                        status,
                        attempts,
                        max_attempts
                    );
                    tokio::time::sleep(Duration::from_millis(attempts as u64 * 500)).await;
                    continue;
                }
                anyhow::bail!("should_i_respond LLM error {}: {}", status, body);
            }
            Err(e) => anyhow::bail!("should_i_respond request failed: {}", e),
        }
    };

    let chat: ChatResponse = resp
        .json()
        .await
        .context("Failed to parse should_i_respond response")?;

    let content = chat
        .choices
        .first()
        .map(|c| c.message.get_response())
        .unwrap_or_default();

    let content = crate::response_parser::ResponseParser::strip_thinking(&content);

    // Parse JSON response
    let trimmed = content.trim();

    // Try to extract JSON from the response (LLM might wrap it in markdown)
    let json_str = if trimmed.starts_with('{') {
        trimmed.to_string()
    } else if let Some(start) = trimmed.find('{') {
        if let Some(end) = trimmed.rfind('}') {
            trimmed[start..=end].to_string()
        } else {
            trimmed.to_string()
        }
    } else {
        // Not JSON — treat as "no response"
        log::warn!("should_i_respond got non-JSON: {}", trimmed);
        return Ok(ShouldRespondResult {
            respond: false,
            confidence: 0.0,
            reason: "non-JSON response".into(),
        });
    };

    match serde_json::from_str::<ShouldRespondResult>(&json_str) {
        Ok(result) => Ok(result),
        Err(e) => {
            log::warn!("should_i_respond parse error: {} — raw: {}", e, json_str);
            Ok(ShouldRespondResult {
                respond: false,
                confidence: 0.0,
                reason: format!("Parse error: {}", e),
            })
        }
    }
}

fn build_selection_prompt(agent_name: &str, persona: &str) -> String {
    let mut p = String::new();

    if !persona.is_empty() {
        p.push_str(persona);
        p.push_str("\n\n");
    } else {
        p.push_str(&format!("You are {}.\n\n", agent_name));
    }

    p.push_str(
        r#"You are a member of a group chat. Decide whether YOU should respond.

Respond ONLY if:
- You have NEW information or a DIFFERENT perspective not yet mentioned
- Someone said something you can directly build on from your expertise
- The topic is directly in your area of specialization

Do NOT respond if:
- Someone already gave a good answer
- You would only repeat or agree with what was said
- The topic is outside your expertise
- The user is clearly telling the group to stop or be quiet

Reply with JSON only:
{"respond": true/false, "confidence": 0.0-1.0, "reason": "brief explanation"}"#,
    );

    p
}

fn build_selection_user_message(
    user_message: &str,
    history: &[String],
    previous_responses: &[String],
) -> String {
    let mut out = String::new();

    if !history.is_empty() {
        out.push_str("Recent conversation:\n");
        for line in history.iter() {
            out.push_str(line);
            out.push('\n');
        }
        out.push('\n');
    }

    if !previous_responses.is_empty() {
        out.push_str("Team members who already responded in this round:\n");
        for resp in previous_responses {
            out.push_str(resp);
            out.push_str("\n---\n");
        }
        out.push('\n');
    }

    out.push_str("User's message: ");
    out.push_str(user_message);
    out.push_str("\n\nShould you respond? JSON only.");

    out
}