use crate::{
integrations::{
analyze_client::{ComplexityHotspot, Smell},
apex_context::ApexContextResult,
search_client::SearchResult,
},
llm::{ChatMessage, LlmRequest, ResponseSchema, strip_provider_prefix},
models::ReviewResult,
};
const REVIEWER_TEMPERATURE: f32 = 0.3;
const REVIEWER_MAX_TOKENS: u32 = 4096;
const REVIEW_SCHEMA_NAME: &str = "review_output";
pub fn review_response_schema() -> ResponseSchema {
ResponseSchema {
name: REVIEW_SCHEMA_NAME.to_string(),
schema: serde_json::json!({
"type": "object",
"properties": {
"grade": {
"type": "string",
"enum": ["A+", "A", "A-", "B+", "B", "B-", "C+", "C", "C-", "D+", "D", "D-", "F"],
"description": "Letter grade for overall PR quality (A+ = best, F = worst)"
},
"grade_justification": {
"type": "string",
"description": "One-line justification for the assigned grade"
},
"verdict": {
"type": "string",
"enum": ["APPROVE", "APPROVE*", "REQUEST_CHANGES", "BLOCK", "UNKNOWN"],
"description": "Review verdict — one of the five board grades"
},
"summary": {
"type": "string",
"description": "One-line summary of the review"
},
"findings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"title": {"type": "string"},
"body": {"type": "string"},
"severity": {
"type": "string",
"enum": ["low", "medium", "high", "critical"]
},
"confidence": {"type": "number", "minimum": 0.0, "maximum": 1.0},
"file": {"type": "string"},
"line": {"type": ["integer", "null"]}
},
"required": ["title", "body"]
}
}
},
"required": ["grade", "grade_justification", "verdict", "summary", "findings"]
}),
}
}
#[derive(Debug, Default)]
pub struct ReviewContext {
pub search_results: Vec<SearchResult>,
pub complexity_hotspots: Vec<ComplexityHotspot>,
pub smells: Vec<Smell>,
pub apex_results: Vec<ApexContextResult>,
}
pub fn reviewer_system_prompt() -> &'static str {
r#"You are a senior software engineer performing a pull-request code review.
## Letter grade (MANDATORY — assign exactly one)
Assign a letter grade on the 13-step scale: A+, A, A-, B+, B, B-, C+, C, C-, D+, D, D-, F.
| Grade band | Quality signal |
|-------------------|-------------------------------------------------------------|
| A+, A, A- | Excellent to exceptional — clean, correct, well-structured. |
| B+, B, B- | Good to solid — acceptable, minor nits only. |
| C+, C, C- | Marginal — notable issues or advisory concerns. |
| D+, D, D- | Poor — significant problems requiring changes before merge. |
| F | Failing — compile error, data corruption, security bypass. |
Provide a one-line justification in `grade_justification`.
## Verdict (MANDATORY — pick exactly one)
| Verdict | Grade band | When to use |
|-----------------|-----------------|-------------|
| BLOCK | F | Compile error introduced by this diff, data corruption, security/auth bypass. |
| REQUEST_CHANGES | D+, D, D- | Confirmed correctness bug, silent data loss, missing required migration/backfill, resource leak, unhandled exception path with real failure consequence. |
| APPROVE* | C+, C, C- | Advisory concern the author may reasonably disagree with; the code ships but you want the note on record. |
| APPROVE | B- or above | No significant concerns; the change is clean and correct. |
| UNKNOWN | — | The diff was too truncated, context-free, or otherwise insufficient to assess. |
**Keep your verdict consistent with your grade.** A grade of "D" must have verdict REQUEST_CHANGES;
a grade of "F" must have verdict BLOCK; a grade of "B-" or above must have verdict APPROVE.
- Your default verdict is APPROVE (default grade A-). You bear the burden of proof to escalate.
- APPROVE* requires at least one Medium finding. Do not emit APPROVE* with only Low findings.
- REQUEST_CHANGES requires ALL THREE: (a) a specific wrong line cited verbatim,
(b) a traceable failure path, (c) a concrete fix proposed.
- Do NOT emit UNKNOWN just because the PR is large; use it only when you
genuinely cannot tell if the change is correct.
- **Do not under-rate a clearly blocking issue as advisory.** If it would break
a build or corrupt data in production, assign severity=critical and verdict=BLOCK.
## Compile-break rule (CRITICAL)
If the diff REMOVES a symbol (enum value, method, constant, field, function
signature change) AND the same diff still shows remaining references or
call-sites to that removed symbol elsewhere in the codebase, that is a
compile-time regression. Assign the finding severity=critical and
verdict=BLOCK (grade=F). No other context softens this.
## Severity anchors for findings
Every finding MUST have a `severity` from:
- **critical** — compile error, data corruption, security bypass, auth failure.
- **high** — confirmed correctness bug, silent data loss, unhandled exception
path, missing required migration, resource leak with real consequence.
- **medium** — advisory: code smell, suboptimal pattern, minor risk, the author
may reasonably disagree.
- **low** — cosmetic, documentation gap, style preference.
## What to review
Focus on: correctness bugs, security issues, data-loss risks, logic errors.
Note but do not block on: style, minor naming, documentation gaps, test coverage.
## Output (REQUIRED — populate the structured response fields)
- `grade`: one of A+, A, A-, B+, B, B-, C+, C, C-, D+, D, D-, F.
- `grade_justification`: one-sentence reason for the grade.
- `verdict`: one of APPROVE, APPROVE*, REQUEST_CHANGES, BLOCK, UNKNOWN.
- `summary`: one sentence summary of the review.
- `findings`: array of issues found (empty array if none).
Each finding has: title, body (detailed description), severity (low/medium/high/critical),
confidence (0.0–1.0), file (source file path), line (null if not applicable).
`confidence` is a float in [0.0, 1.0].
`line` may be null if no specific line is applicable.
`findings` may be an empty array if there are no issues."#
}
pub fn build_review_prompt(
owner: &str,
repo: &str,
pr_meta: &ReviewPrMeta,
diff: &str,
context: &ReviewContext,
external_context: &str,
reviewer_model: &str,
) -> LlmRequest {
let user_message = build_user_message(owner, repo, pr_meta, diff, context, external_context);
LlmRequest {
model: strip_provider_prefix(reviewer_model).to_string(),
system: reviewer_system_prompt().to_string(),
messages: vec![ChatMessage {
role: "user".to_string(),
content: user_message,
}],
temperature: REVIEWER_TEMPERATURE,
max_tokens: REVIEWER_MAX_TOKENS,
response_schema: Some(review_response_schema()),
}
}
#[derive(Debug, Default, Clone)]
pub struct ReviewPrMeta {
pub title: String,
pub body: String,
pub author: String,
pub url: String,
}
impl ReviewPrMeta {
pub fn from_result(result: &ReviewResult) -> Self {
Self {
title: result.pr_title.clone(),
body: String::new(),
author: String::new(),
url: result.pr_url.clone(),
}
}
}
fn build_user_message(
owner: &str,
repo: &str,
pr_meta: &ReviewPrMeta,
diff: &str,
context: &ReviewContext,
external_context: &str,
) -> String {
let mut msg = String::with_capacity(diff.len() + 2048);
msg.push_str(&format!("## PR: {owner}/{repo}"));
if !pr_meta.title.is_empty() {
msg.push_str(&format!(" — {}", pr_meta.title));
}
msg.push('\n');
if !pr_meta.author.is_empty() {
msg.push_str(&format!("Author: @{}\n", pr_meta.author));
}
if !pr_meta.url.is_empty() {
msg.push_str(&format!("URL: {}\n", pr_meta.url));
}
msg.push('\n');
msg.push_str("## Unified diff\n\n");
msg.push_str("```diff\n");
msg.push_str(diff);
if !diff.ends_with('\n') {
msg.push('\n');
}
msg.push_str("```\n\n");
if !context.search_results.is_empty() {
msg.push_str("## Related code (from trusty-search)\n\n");
for (i, result) in context.search_results.iter().enumerate().take(10) {
msg.push_str(&format!("### Context {} — {}\n", i + 1, result.file));
if let Some(ref snippet) = result.snippet {
msg.push_str("```\n");
msg.push_str(snippet);
if !snippet.ends_with('\n') {
msg.push('\n');
}
msg.push_str("```\n");
}
msg.push('\n');
}
}
if !context.complexity_hotspots.is_empty() {
msg.push_str("## Complexity hotspots (from trusty-analyze)\n\n");
for h in context.complexity_hotspots.iter().take(5) {
let fn_part = h
.function_name
.as_deref()
.map(|f| format!(" `{f}`"))
.unwrap_or_default();
msg.push_str(&format!(
"- `{}`{fn_part}: cyclomatic={}, cognitive={}\n",
h.file, h.cyclomatic, h.cognitive
));
}
msg.push('\n');
}
if !context.smells.is_empty() {
msg.push_str("## Code smells (from trusty-analyze)\n\n");
for s in context.smells.iter().take(10) {
let line_part = s.line.map(|l| format!(" (line {l})")).unwrap_or_default();
msg.push_str(&format!(
"- `{}` — {} [{}]{line_part}\n",
s.file, s.category, s.severity
));
}
msg.push('\n');
}
if !context.apex_results.is_empty() {
msg.push_str("## Related APEX product specs\n\n");
for (i, apex) in context
.apex_results
.iter()
.enumerate()
.take(crate::config::constants::MAX_APEX_RESULTS)
{
let line_suffix = apex.start_line.map(|l| format!(":{l}")).unwrap_or_default();
msg.push_str(&format!(
"### APEX {} — `{}{}`\n",
i + 1,
apex.file,
line_suffix
));
if !apex.snippet.is_empty() {
msg.push_str("```\n");
msg.push_str(&apex.snippet);
if !apex.snippet.ends_with('\n') {
msg.push('\n');
}
msg.push_str("```\n");
}
msg.push('\n');
}
msg.push_str(
"When citing an APEX spec, use the format: \
[apex: `path/to/spec.md:15` — \"brief excerpt\"]\n\n",
);
}
let external = external_context.trim();
if !external.is_empty() {
msg.push_str(external);
if !external.ends_with('\n') {
msg.push('\n');
}
msg.push('\n');
}
msg.push_str(
"Please review the diff above and populate the structured response \
fields (verdict, summary, findings) as specified in the system prompt.\n",
);
msg
}
#[cfg(test)]
#[path = "prompt_tests.rs"]
mod tests;