difflore-core 0.1.0

use super::{ReviewIssueRecord, ReviewPerspective};
use crate::context::assembler::PastVerdictSection;
use crate::context::types::PastVerdict;

// Segmented prompt for prompt cache reuse.

/// A single team rule in the canonical form used when producing the
/// cacheable team-rules digest. This is deliberately minimal: the point
/// of the digest is to be deterministic / hash-stable across reviews so
/// that an upstream Anthropic `cache_control` hint can reuse the prefix.
#[derive(Debug, Clone)]
pub struct TeamRuleDigest {
    pub id: String,
    pub content: String,
}

/// System prompt split into a cacheable stable prefix and a per-review
/// dynamic suffix. The stable prefix is intended to be reused across
/// multiple reviews from the same team (identical perspective + rules +
/// repo context) so providers that support prompt caching (e.g. Anthropic
/// `cache_control: ephemeral`) can skip re-tokenising it.
///
/// Concatenating `stable_prefix + dynamic_suffix` yields a conventional
/// flat system prompt — `build_system_prompt` relies on this property for
/// byte-identical backward compatibility.
#[derive(Debug, Clone)]
pub struct SegmentedPrompt {
    /// Cacheable, hash-stable across reviews for the same team:
    /// base instructions → perspective addendum → sorted team rules →
    /// repo context facts.
    pub stable_prefix: String,
    /// Per-review content: past verdicts → current diff → user instructions.
    pub dynamic_suffix: String,
}

/// Hard-coded base instructions for the review system prompt. Kept as a
/// constant so the compatibility shim and `build_segmented_prompt` share the
/// exact same bytes.
const REVIEW_BASE_INSTRUCTIONS: &str = r#"You are a code review assistant. Review the provided diff against the given rules and return issues as a JSON array.

Each issue must be a JSON object with these fields:
- severity: "error" | "warning" | "info"
- rule: the rule name that was violated
- ruleId: stable rule ID when the matched rule provides one (optional, string)
- message: clear description of the issue
- file: repo-relative path of the affected file as it appears in the diff header (e.g. "src/app.ts" — strip the "a/" or "b/" prefix; REQUIRED for downstream patch generation)
- line: line number in the diff (optional, number)
- existingCode: copy the EXACT affected source line(s) verbatim from the diff, without the leading +/- marker (optional, string; helps pinpoint the precise location)
- suggestion: how to fix it (optional, string)

Matched rules are the user's review memory and should be treated as authoritative review criteria. If the diff directly matches a rule's bad pattern, contradicts a rule's recommendation, or removes code a rule says is required, report that issue even when the change is small or the code still compiles. Do not return [] when a matched rule clearly applies to the diff.

Return ONLY a JSON array. No markdown, no explanation, no code blocks. Just the raw JSON array.
If no issues are found, return an empty array: []"#;

/// Render the team-rules digest section. Rules are sorted by `id` so the
/// resulting string is deterministic across review runs — this is what
/// makes the stable prefix hash-stable and therefore cacheable.
///
/// Returns an empty string when `rules` is empty so callers that have no
/// rules produce a prefix that is byte-identical to the flat prompt.
pub(super) fn render_team_rules_digest(rules: &[TeamRuleDigest]) -> String {
    if rules.is_empty() {
        return String::new();
    }
    let mut sorted: Vec<&TeamRuleDigest> = rules.iter().collect();
    sorted.sort_by(|a, b| a.id.cmp(&b.id));

    let mut s = String::new();
    s.push_str("\n\n## Team Rules Digest\n");
    for r in sorted {
        s.push_str("\n- id: ");
        s.push_str(&r.id);
        s.push('\n');
        s.push_str("  content: ");
        s.push_str(&r.content);
        s.push('\n');
    }
    s
}

/// Render the optional repo context facts section. Empty input is treated the
/// same as `None` to preserve byte-identical reassembly.
pub(super) fn render_repo_context_section(repo_context_facts: Option<&str>) -> String {
    match repo_context_facts {
        Some(facts) if !facts.is_empty() => {
            let mut s = String::new();
            s.push_str("\n\n## Repo Context\n");
            s.push_str(facts);
            s
        }
        _ => String::new(),
    }
}

/// Render the per-review dynamic suffix. Empty inputs produce an empty
/// string so the compatibility shim can reassemble byte-identical output.
///
/// `past_verdicts` is review-memory recall injected at the front of the dynamic
/// segment, so the LLM reads prior verdicts before the current diff. When it is
/// `None` or empty the section is omitted entirely.
pub(super) fn render_dynamic_suffix(
    diff: &str,
    user_instructions: &str,
    past_verdicts: Option<&[PastVerdict]>,
) -> String {
    let has_diff = !diff.is_empty();
    let has_instructions = !user_instructions.is_empty();
    let verdicts_rendered = match past_verdicts {
        Some(v) if !v.is_empty() => PastVerdictSection::new(v.to_vec()).render(),
        _ => String::new(),
    };
    let has_verdicts = !verdicts_rendered.is_empty();

    if !has_diff && !has_instructions && !has_verdicts {
        return String::new();
    }

    let mut s = String::new();
    // Past verdicts come first in the dynamic segment so the LLM reads
    // prior decisions before the current diff.
    if has_verdicts {
        s.push_str("\n\n");
        s.push_str(verdicts_rendered.trim_end());
    }
    if has_diff {
        s.push_str("\n\n## Current Diff\n```diff\n");
        s.push_str(diff);
        s.push_str("\n```");
    }
    if has_instructions {
        s.push_str("\n\n## User Instructions\n");
        s.push_str(user_instructions);
    }
    s
}

/// Build a `SegmentedPrompt` split into a hash-stable cacheable prefix
/// and a per-review dynamic suffix. See [`SegmentedPrompt`] for layout.
///
/// Ordering (top → bottom):
/// * `stable_prefix`: base instructions → perspective addendum → team
///   rules digest (sorted by id) → repo context facts.
/// * `dynamic_suffix`: past verdicts → current diff → user instructions.
///
/// Concatenating the two halves yields the same flat prompt that
/// `build_system_prompt` reassembles for compatibility.
pub fn build_segmented_prompt(
    perspective: Option<ReviewPerspective>,
    team_rules: &[TeamRuleDigest],
    diff: &str,
    user_instructions: &str,
    repo_context_facts: Option<&str>,
    past_verdicts: Option<&[PastVerdict]>,
) -> SegmentedPrompt {
    // 1. Base instructions (hardcoded, perspective-agnostic).
    let mut stable_prefix = String::with_capacity(REVIEW_BASE_INSTRUCTIONS.len() + 1024);
    stable_prefix.push_str(REVIEW_BASE_INSTRUCTIONS);

    // 2. Perspective addendum (if any).
    if let Some(p) = perspective {
        stable_prefix.push_str(p.system_prompt_addendum());
    }

    // 3. Team rules digest (deterministic / hash-stable).
    stable_prefix.push_str(&render_team_rules_digest(team_rules));

    // 4. Repo context facts.
    stable_prefix.push_str(&render_repo_context_section(repo_context_facts));

    // Dynamic suffix: past verdicts → current diff → user instructions.
    let dynamic_suffix = render_dynamic_suffix(diff, user_instructions, past_verdicts);

    SegmentedPrompt {
        stable_prefix,
        dynamic_suffix,
    }
}

/// Build the system prompt for review check.
///
/// When `perspective` is `Some`, the perspective-specific addendum is
/// appended to the base prompt. When `None`, the returned string is
/// byte-identical to the flat single-pass prompt.
///
/// Delegates to `build_segmented_prompt` with empty extras and reassembles the
/// two halves.
#[cfg(test)]
pub(super) fn build_system_prompt(perspective: Option<ReviewPerspective>) -> String {
    let seg = build_segmented_prompt(perspective, &[], "", "", None, None);
    format!("{}{}", seg.stable_prefix, seg.dynamic_suffix)
}

/// Build the user prompt with diff + matched rules
pub(super) fn build_user_prompt(
    diff: &str,
    rules_text: Option<&str>,
    file_path: Option<&str>,
) -> String {
    let mut prompt = String::new();

    if let Some(rules) = rules_text {
        prompt.push_str("## Review Rules\n\n");
        prompt.push_str("Each matched rule may include a `Rule ID:` line. When you cite a matched rule, copy that exact value into `ruleId`.\n\n");
        prompt.push_str("Use these rules as concrete checks against the diff. Prefer one precise issue over [] when a rule directly applies.\n\n");
        prompt.push_str(rules);
        prompt.push_str("\n\n");
    }

    if let Some(path) = file_path {
        prompt.push_str(&format!("## File: {path}\n\n"));
    }

    prompt.push_str("## Diff to Review\n\n```diff\n");
    prompt.push_str(diff);
    prompt.push_str("\n```\n");

    prompt
}

/// System prompt used by the self-check verification pass. Short and
/// strict so the cheap model doesn't hallucinate new issues.
pub(super) const VERIFY_SYSTEM_PROMPT: &str = r#"You are a strict code-review verifier. Given the diff and a list of candidate issues, for EACH issue decide whether it is a true positive.

Return ONLY a JSON array. Each element must be an object:
{"id": <index>, "confidence": <float 0..1>, "verdict": "keep"|"drop", "reason": "<short>"}

Be strict — drop obvious false positives. Keep an issue when the changed line directly matches the cited rule's bad pattern or contradicts the cited rule's recommendation, even if surrounding pre-existing code has similar style. Do NOT invent new issues.
Return the raw JSON array only, no markdown, no explanation."#;

/// Build the verification user-prompt: the diff (trimmed) + the
/// candidate issues enumerated with stable `id` indices so the model's
/// response can be matched back deterministically.
pub(super) fn build_verify_user_prompt(diff: &str, issues: &[ReviewIssueRecord]) -> String {
    const DIFF_LIMIT: usize = 8_000;
    let trimmed = if diff.len() > DIFF_LIMIT {
        &diff[..DIFF_LIMIT]
    } else {
        diff
    };

    let mut s = String::new();
    s.push_str("## Diff\n```diff\n");
    s.push_str(trimmed);
    s.push_str("\n```\n\n## Candidate issues\n");
    for (i, issue) in issues.iter().enumerate() {
        s.push_str(&format!(
            "- id: {}\n  severity: {}\n  rule: {}\n  file: {}\n  line: {}\n  message: {}\n  suggestion: {}\n",
            i,
            issue.severity,
            issue.rule,
            issue.file.as_deref().unwrap_or(""),
            issue.line.map(|n| n.to_string()).unwrap_or_default(),
            issue.message,
            issue.suggestion.as_deref().unwrap_or(""),
        ));
    }
    s
}

pub(super) const SUMMARY_SYSTEM_PROMPT: &str = r#"You are a code-review summarizer. Given a diff, produce a concise one-line PR summary plus per-file intent descriptions.

Return ONLY a JSON object with this exact shape:
{
  "oneLineSummary": "<one sentence>",
  "walkthroughByFile": [
    {"file": "<path>", "intent": "<one sentence describing what this file's change does>"}
  ]
}
No markdown, no code blocks, no extra commentary."#;

pub(super) fn build_summary_user_prompt(diff: &str, files: &[String]) -> String {
    const DIFF_LIMIT: usize = 8_000;
    let trimmed = if diff.len() > DIFF_LIMIT {
        &diff[..DIFF_LIMIT]
    } else {
        diff
    };
    let mut s = String::new();
    s.push_str("## Files touched\n");
    for f in files {
        s.push_str("- ");
        s.push_str(f);
        s.push('\n');
    }
    s.push_str("\n## Diff\n```diff\n");
    s.push_str(trimmed);
    s.push_str("\n```\n");
    s
}