clawgarden-agent 0.4.0

//! LLM API bridge — single call for natural group-chat judge + response.
//!
//! The LLM sees the conversation with speaker attribution and decides
//! whether to respond or stay silent, just like a real group chat.

use anyhow::{Context, Result};
use clawgarden_proto::MessagePayload;
use rand::Rng;
use reqwest::Client;
use serde::{Deserialize, Serialize};
use std::time::Duration;

/// Response timeout for LLM API calls
const LLM_TIMEOUT_MS: u64 = 30_000;

/// Z.AI Coding Plan API base URL
const ZAI_API_BASE: &str = "https://api.z.ai/api/coding/paas/v4";

/// Default model
const DEFAULT_MODEL: &str = "glm-5-turbo";

// ── OpenAI-compatible types ───────────────────────────────────────────────

#[derive(Debug, Serialize)]
struct SerChatMessage {
    role: String,
    content: String,
}

#[derive(Debug, Serialize)]
struct ChatRequest {
    model: String,
    messages: Vec<SerChatMessage>,
    max_tokens: u32,
    temperature: f32,
}

#[derive(Debug, Deserialize)]
struct ChatMessage {
    role: String,
    content: String,
    #[serde(default)]
    reasoning_content: Option<String>,
}

impl ChatMessage {
    fn get_response(&self) -> String {
        if !self.content.is_empty() {
            self.content.clone()
        } else if let Some(ref reasoning) = self.reasoning_content {
            reasoning.trim().to_string()
        } else {
            String::new()
        }
    }
}

#[derive(Debug, Deserialize)]
struct ChatResponse {
    choices: Vec<ChatChoice>,
}

#[derive(Debug, Deserialize)]
struct ChatChoice {
    message: ChatMessage,
}

// ── Public API ────────────────────────────────────────────────────────────

/// Combined judge + response via single LLM call.
///
/// `message` is the formatted message with speaker attribution.
/// `history` is recent conversation lines.
/// `force` = true means this is a fallback — nobody else responded, so be more willing to speak.
///
/// Returns `None` if the agent should stay silent, `Some(content)` if responding.
pub async fn judge_and_respond(
    agent_name: &str,
    persona: &str,
    memory: &str,
    message: &str,
    history: &[String],
    force: bool,
) -> Result<Option<MessagePayload>> {
    let api_key = std::env::var("ZAI_API_KEY")
        .or_else(|_| std::env::var("Z_AI_API_KEY"))
        .context("ZAI_API_KEY not set")?;

    // Small random jitter 0-1500ms to stagger concurrent LLM calls
    let jitter = rand::thread_rng().gen_range(0..2500u64);
    tokio::time::sleep(Duration::from_millis(jitter)).await;

    let system = build_system_prompt(agent_name, persona, memory, force);
    let user_msg = build_user_message(message, history);

    let request = ChatRequest {
        model: DEFAULT_MODEL.to_string(),
        messages: vec![
            SerChatMessage { role: "system".into(), content: system },
            SerChatMessage { role: "user".into(), content: user_msg },
        ],
        max_tokens: 512,
        temperature: 0.8, // slightly higher for natural variation
    };

    let client = Client::builder()
        .timeout(Duration::from_millis(LLM_TIMEOUT_MS))
        .build()?;

    let url = format!("{}/chat/completions", ZAI_API_BASE);

    // Retry up to 2 times on transient errors (429, 5xx)
    let mut attempts = 0;
    let max_attempts = 3;
    let chat = loop {
        attempts += 1;
        let resp = client
            .post(&url)
            .header("Authorization", format!("Bearer {}", api_key))
            .header("Content-Type", "application/json")
            .json(&request)
            .send()
            .await;

        match resp {
            Ok(r) if r.status().is_success() => {
                break r.json::<ChatResponse>().await.context("LLM parse failed")?;
            }
            Ok(r) => {
                let status = r.status();
                let body = r.text().await.unwrap_or_default();
                if (status.as_u16() == 429 || status.is_server_error()) && attempts < max_attempts {
                    log::warn!("LLM {} (attempt {}/{}), retrying in {}ms",
                        status, attempts, max_attempts, attempts * 1000);
                    tokio::time::sleep(Duration::from_millis(attempts as u64 * 1000)).await;
                    continue;
                }
                anyhow::bail!("LLM error {}: {}", status, body);
            }
            Err(e) => anyhow::bail!("LLM request failed: {}", e),
        }
    };

    let content = chat
        .choices
        .first()
        .map(|c| c.message.get_response())
        .filter(|s| !s.is_empty())
        .unwrap_or_default();

    // Detect silence
    let trimmed = content.trim();
    if is_silence(trimmed) {
        log::info!("LLM: staying silent ({})", agent_name);
        return Ok(None);
    }

    log::info!("LLM: responding ({}, {} chars)", agent_name, content.len());
    Ok(Some(MessagePayload {
        content,
        context: vec![],
    }))
}

/// Check if the LLM chose to stay silent.
fn is_silence(s: &str) -> bool {
    let lower = s.to_lowercase();
    // Exact or prefix matches
    lower == "no_response"
        || lower == "..."
        || lower == "(silence)"
        || lower == "(no response)"
        || lower == "pass"
        || lower.starts_with("no_response")
        || lower.starts_with("no response")
        // If the response is mostly just explaining why they won't respond
        || (lower.len() < 80 && lower.contains("won't respond") && !lower.contains("but"))
        || (lower.len() < 80 && lower.contains("should not respond") && !lower.contains("but"))
        || (lower.len() < 80 && lower.contains("nothing to add"))
        || (lower.len() < 80 && lower.contains("stay silent"))
        || (lower.len() < 80 && lower.contains("i'll pass"))
}

// ── Prompt Engineering ────────────────────────────────────────────────────

fn build_system_prompt(agent_name: &str, persona: &str, memory: &str, force: bool) -> String {
    let mut p = String::new();

    // ── Identity ──
    if !persona.is_empty() {
        p.push_str(persona);
        p.push_str("\n\n");
    } else {
        p.push_str(&format!("You are {}.\n\n", agent_name));
    }

    // ── My Telegram username ──
    if let Ok(username) = std::env::var("TELEGRAM_BOT_USERNAME") {
        p.push_str(&format!("Your Telegram username: @{}\n", username));
    }

    // ── Team members (comma-separated: "name1:@username1,name2:@username2,...") ──
    if let Ok(members) = std::env::var("TEAM_MEMBERS") {
        if !members.is_empty() {
            p.push_str("\nGroup members:\n");
            for entry in members.split(',') {
                p.push_str(&format!("- {}\n", entry.trim()));
            }
            p.push('\n');
        }
    }

    // ── Memory ──
    if !memory.is_empty() {
        p.push_str("[Things you remember]\n");
        p.push_str(memory);
        p.push_str("\n\n");
    }

    // ── Group chat rules ──
    p.push_str(
r#"너는 그룹 채팅방에 있는 AI 멤버야. 사용자와 다른 AI 멤버의 메시지를 볼 수 있어.

그룹 채팅 규칙:

1. 사용자가 방에 메시지를 보내면, 너의 역할과 관련이 있는지 판단해.
   - 인사, 질문, 대화 시도 → 반드시 짧게라도 응답해. 무시하면 안 돼.
   - 너의 전문 영역과 관련된 주제 → 응답해.
   - 너의 이름을 부르거나 너에게 직접 말하면 → 응답해.

2. 응답하지 않아도 되는 경우:
   - 누군가 이미 좋은 답변을 줬을 때
   - 다른 사람들끼리 대화 중이고 네가 끼어들 이유가 없을 때
   - 단순 확인 ("ㅇㅇ", "ok", "그래") 같은 내용일 때

3. 응답할 때:
   - 1~3문장으로 간결하게
   - 상대방이 쓰는 언어로 답해
   - 자연스럽게, 사람처럼 말해
   - 네 이름을 앞에 붙이지 마

4. 응답하지 않겠다고 결정하면 정확히 이렇게만 답해: NO_RESPONSE
   다른 설명은 필요 없어.

그룹 멤버:
\
   - 사용자 (그룹의 인간 멤버)
\
   - 각 AI 멤버는 자기 이름과 역할이 있어. 대화에서 다른 멤버의 이름이 보이면 그 사람이 이 그룹에 있는 거야.
\
\
멘션 규칙:
\
   - 특정 멤버에게 요청할 때는 @username으로 불러. 예: "@claw_camus_bot, 이 부분 기술 검토 좀 해줘"
\
   - 사용자의 결정이 필요할 때도 명시적으로 물어봐.
\
   - @는 생략해도 되지만 username이 들어가야 상대방이 인식해.
\
\
기억: 모든 메시지에 대답하는 것도 이상하지만, 인사나 질문을 무시하는 건 더 이상해."#,
    );

    if force {
        p.push_str(
            "\n\n중요: 다른 멤버가 이 메시지에 응답하지 않았어. \n             네가 짧게라도 답을 해줘. 인사면 인사로, 질문이면 답변으로. \n             NO_RESPONSE는 절대 하지 마."
        );
    }

    p
}

/// Build the user-facing message that includes conversation history
/// with speaker attribution.
fn build_user_message(message: &str, history: &[String]) -> String {
    let mut out = String::new();

    if !history.is_empty() {
        out.push_str("Recent conversation:\n");
        // Show last N messages (max 10 to keep context manageable)
        let start = history.len().saturating_sub(10);
        for line in history.iter().skip(start) {
            out.push_str(line);
            out.push('\n');
        }
        out.push('\n');
    }

    out.push_str("Current message:\n");
    out.push_str(message);
    out.push_str("\n\nShould you respond? If yes, write your response. If no, write: NO_RESPONSE");

    out
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_silence_detection() {
        assert!(is_silence("NO_RESPONSE"));
        assert!(is_silence("no_response"));
        assert!(is_silence("No response"));
        assert!(is_silence("..."));
        assert!(is_silence("pass"));
        assert!(is_silence("I'll pass"));
        assert!(is_silence("I have nothing to add here"));
        assert!(is_silence("(silence)"));
        assert!(!is_silence("그래, 알겠습니다."));
        assert!(!is_silence("I think the best approach is..."));
    }

    #[test]
    fn test_build_user_message() {
        let msg = "[사용자]: 안녕하세요";
        let history = vec![
            "[camus]: 반갑습니다".to_string(),
            "[eleven]: 환영합니다".to_string(),
        ];
        let result = build_user_message(msg, &history);
        assert!(result.contains("Recent conversation"));
        assert!(result.contains("[camus]"));
        assert!(result.contains("[사용자]"));
    }

    #[test]
    fn test_system_prompt_has_rules() {
        let p = build_system_prompt("eleven", "", "", false);
        assert!(p.contains("그룹 채팅 규칙"));
        assert!(p.contains("NO_RESPONSE"));
    }

    #[test]
    fn test_force_prompt() {
        let p = build_system_prompt("eleven", "", "", true);
        assert!(p.contains("다른 멤버가 이 메시지에 응답하지 않았어"));
        let p2 = build_system_prompt("eleven", "", "", false);
        assert!(!p2.contains("다른 멤버가"));
    }
}