nitpicker 0.3.2

Multi-reviewer code review using LLMs with parallel agents and debate mode
use crate::llm::{Completion, CompletionResponse, LLMClientDyn, TokenUsage};
use eyre::Result;
use rig::completion::message::{ToolChoice, ToolResultContent, UserContent};
use rig::completion::{AssistantContent, Message};
use std::sync::Arc;
use tracing::{info, warn};

const COMPACTION_MAX_OUTPUT_TOKENS: u64 = 8_192;
const COMPACTION_MAX_CORRECTIONS: usize = 2;
const CONTINUE_MESSAGE: &str = "Continue from where you left off.";
const SUMMARY_TAG: &str = "summary";
const COMPACTION_SUMMARY_INSTRUCTIONS: &str = "Focus strictly on information vital for code analysis. Omit conversational filler, raw search/tool outputs, and large blocks of code.\n\
When constructing the summary, you MUST use the following exact markdown structure inside the tags:\n\
<summary>\n\
## Review Goal\n\
[1-2 sentences on the core objective of this code exploration/review. What are we looking for?]\n\
## Key Findings & Discoveries\n\
- [List major architectural insights, design patterns discovered, or critical issues/bugs identified so far.]\n\
- [Keep these concise but highly technical.]\n\
## Codebase Map (Relevant Files)\n\
- [List the critical files and directories that have been identified as relevant to the goal.]\n\
- [Briefly note why they are relevant (e.g., `src/auth/token.rs`: handles JWT validation and is where the bug likely resides).]\n\
## Explored Territory\n\
- [Briefly list what areas, files, or concepts have already been thoroughly investigated so we do not repeat work.]\n\
## Last Action & Immediate Context\n\
- [Describe the most recent tool calls and their results. What was the agent trying to find or verify? What did it just learn?]\n\
## Open Questions & Next Steps\n\
- [List any unresolved anomalies, pending review items, constraints to remember, or specific files that still need to be examined.]\n\
</summary>\n\
Return EXACTLY ONE tagged block starting with <summary> and ending with </summary>. Do not include any text, pleasantries, or explanations outside of these tags.";
const MISSING_SUMMARY_CORRECTION: &str = "Your response is missing the required <summary>...</summary> block. Reply with ONLY the tagged summary block and nothing else outside the tags.";
const WRAP_SUMMARY_CORRECTION: &str = "Wrap your entire response in <summary>...</summary> tags and output nothing outside them.";

pub struct CompactionOutcome {
    pub usage: TokenUsage,
    pub summary: String,
    pub trigger_usage: TokenUsage,
}

enum CompactionInput<'a> {
    Messages(&'a [Message]),
    Transcript(&'a str),
}

enum CorrectionMode {
    MissingSummary,
    WrapSummary,
}

pub async fn compact_history(
    client: Arc<dyn LLMClientDyn>,
    model: &str,
    system_prompt: &str,
    history: &mut Vec<Message>,
    prompt: &mut Message,
    turn: usize,
    usage: TokenUsage,
) -> Result<Option<CompactionOutcome>> {
    if history.is_empty() {
        return Ok(None);
    }

    let outcome = summarize_history(client, model, system_prompt, history, turn, usage).await?;
    let continue_msg = Message::user(CONTINUE_MESSAGE.to_string());
    *history = vec![
        Message::user(format!(
            "compacted conversation summary before turn {turn}:\n\n{}",
            outcome.summary
        )),
        continue_msg.clone(),
    ];
    *prompt = continue_msg;
    info!(
        turn,
        trigger_input_tokens = outcome.trigger_usage.input_tokens,
        trigger_output_tokens = outcome.trigger_usage.output_tokens,
        trigger_total_tokens = outcome.trigger_usage.total_tokens,
        input_tokens = outcome.usage.input_tokens,
        output_tokens = outcome.usage.output_tokens,
        total_tokens = outcome.usage.total_tokens,
        "compaction complete"
    );
    Ok(Some(outcome))
}

async fn summarize_history(
    client: Arc<dyn LLMClientDyn>,
    model: &str,
    system_prompt: &str,
    thread: &[Message],
    turn: usize,
    usage: TokenUsage,
) -> Result<CompactionOutcome> {
    match run_compaction(
        &client,
        model,
        system_prompt,
        CompactionInput::Messages(thread),
        turn,
        usage,
    )
    .await
    {
        Ok(outcome) => Ok(outcome),
        Err(err) => {
            warn!("in-context compaction failed ({err}), retrying with rendered transcript");
            let transcript = render_history_as_text(thread);
            let outcome = run_compaction(
                &client,
                model,
                system_prompt,
                CompactionInput::Transcript(&transcript),
                turn,
                usage,
            )
            .await?;
            info!("rendered transcript compaction succeeded");
            Ok(outcome)
        }
    }
}

async fn run_compaction(
    client: &Arc<dyn LLMClientDyn>,
    model: &str,
    system_prompt: &str,
    input: CompactionInput<'_>,
    turn: usize,
    usage: TokenUsage,
) -> Result<CompactionOutcome> {
    let prompt = build_compaction_prompt(&input, turn, usage);
    let response = client
        .completion(compaction_completion(
            model,
            system_prompt,
            history_for_initial_request(&input),
            Message::user(prompt.clone()),
        ))
        .await?;

    match extract_summary(&response) {
        Some(summary) => Ok(CompactionOutcome {
            usage: response.usage,
            summary,
            trigger_usage: usage,
        }),
        None => {
            let correction_mode = correction_mode(&input);
            run_corrections(
                client,
                model,
                system_prompt,
                &input,
                usage,
                prompt,
                response,
                correction_mode,
            )
            .await
        }
    }
}

async fn run_corrections(
    client: &Arc<dyn LLMClientDyn>,
    model: &str,
    system_prompt: &str,
    input: &CompactionInput<'_>,
    trigger_usage: TokenUsage,
    original_prompt: String,
    initial_response: CompletionResponse,
    correction_mode: CorrectionMode,
) -> Result<CompactionOutcome> {
    let correction = correction_prompt(&correction_mode);
    let mut history = correction_history(input, original_prompt, initial_response.message());
    let mut previous = initial_response;

    for _ in 0..COMPACTION_MAX_CORRECTIONS {
        let next = client
            .completion(compaction_completion(
                model,
                system_prompt,
                history.clone(),
                Message::user(correction.to_string()),
            ))
            .await?;
        match extract_summary(&next) {
            Some(summary) => {
                return Ok(CompactionOutcome {
                    usage: next.usage,
                    summary,
                    trigger_usage,
                });
            }
            None => {
                history.push(next.message());
                history.push(Message::user(correction.to_string()));
                previous = next;
            }
        }
    }

    match correction_mode {
        CorrectionMode::MissingSummary => Err(eyre::eyre!(
            "compaction output missing <summary> block after {COMPACTION_MAX_CORRECTIONS} corrections: {}",
            previous.text()
        )),
        CorrectionMode::WrapSummary => Err(eyre::eyre!(
            "rendered compaction also failed to produce <summary> block: {}",
            previous.text()
        )),
    }
}

fn compaction_completion(
    model: &str,
    system_prompt: &str,
    history: Vec<Message>,
    prompt: Message,
) -> Completion {
    Completion {
        model: model.to_string(),
        prompt,
        preamble: Some(system_prompt.to_string()),
        history,
        tools: Vec::new(),
        tool_choice: Some(ToolChoice::None),
        max_tokens: Some(COMPACTION_MAX_OUTPUT_TOKENS),
        additional_params: None,
    }
}

fn history_for_initial_request(input: &CompactionInput<'_>) -> Vec<Message> {
    match input {
        CompactionInput::Messages(thread) => thread.to_vec(),
        CompactionInput::Transcript(_) => Vec::new(),
    }
}

fn correction_history(
    input: &CompactionInput<'_>,
    original_prompt: String,
    assistant_message: Message,
) -> Vec<Message> {
    let mut history = history_for_initial_request(input);
    history.push(Message::user(original_prompt));
    history.push(assistant_message);
    history
}

fn correction_mode(input: &CompactionInput<'_>) -> CorrectionMode {
    match input {
        CompactionInput::Messages(_) => CorrectionMode::MissingSummary,
        CompactionInput::Transcript(_) => CorrectionMode::WrapSummary,
    }
}

fn correction_prompt(mode: &CorrectionMode) -> &'static str {
    match mode {
        CorrectionMode::MissingSummary => MISSING_SUMMARY_CORRECTION,
        CorrectionMode::WrapSummary => WRAP_SUMMARY_CORRECTION,
    }
}

fn build_compaction_prompt(input: &CompactionInput<'_>, turn: usize, usage: TokenUsage) -> String {
    let intro = match input {
        CompactionInput::Messages(_) => {
            "This thread is getting out of hand and needs to be compacted. Stop previous work and summarize the conversation so far.".to_string()
        }
        CompactionInput::Transcript(transcript) => format!(
            "Summarize the following code exploration and review session transcript so the agent can continue seamlessly from where it left off.\n\n<transcript>\n{transcript}\n</transcript>"
        ),
    };

    format!(
        "{intro}\n\n{COMPACTION_SUMMARY_INSTRUCTIONS}\n\
This compaction is happening before turn {turn}. Context size at compaction: {} input, {} output, {} total tokens.",
        usage.input_tokens, usage.output_tokens, usage.total_tokens
    )
}

fn extract_summary(response: &CompletionResponse) -> Option<String> {
    extract_tag(&response.text(), SUMMARY_TAG)
}

fn render_history_as_text(history: &[Message]) -> String {
    let mut parts = Vec::new();
    for msg in history {
        match msg {
            Message::User { content } => {
                for item in content.iter() {
                    match item {
                        UserContent::Text(t) if !t.text.is_empty() => {
                            parts.push(format!("[user]: {}", t.text));
                        }
                        UserContent::ToolResult(tr) => {
                            let result = tr
                                .content
                                .iter()
                                .filter_map(|content| match content {
                                    ToolResultContent::Text(t) => Some(t.text.as_str()),
                                    _ => None,
                                })
                                .collect::<Vec<_>>()
                                .join("");
                            parts.push(format!("[tool result {}]: {}", tr.id, result));
                        }
                        _ => {}
                    }
                }
            }
            Message::Assistant { content, .. } => {
                for item in content.iter() {
                    match item {
                        AssistantContent::Text(t) if !t.text().is_empty() => {
                            parts.push(format!("[assistant]: {}", t.text()));
                        }
                        AssistantContent::ToolCall(tc) => {
                            parts.push(format!(
                                "[called: {}({})]",
                                tc.function.name, tc.function.arguments
                            ));
                        }
                        _ => {}
                    }
                }
            }
            Message::System { .. } => {}
        }
    }
    parts.join("\n\n")
}

fn extract_tag(text: &str, tag: &str) -> Option<String> {
    let start_tag = format!("<{tag}>");
    let end_tag = format!("</{tag}>");
    let start = text.find(&start_tag)? + start_tag.len();
    let end = start + text[start..].find(&end_tag)?;
    if end < start {
        return None;
    }
    let content = text[start..end].trim();
    if content.is_empty() {
        None
    } else {
        Some(content.to_string())
    }
}