difflore_core/review/pipeline/
mod.rs

1use super::parse::{parse_issues, severity_rank};
2use super::prompts::{build_segmented_prompt, build_user_prompt};
3use super::{
4    HttpReviewLlm, ReviewCheckInput, ReviewCheckResult, ReviewIssueRecord, ReviewLlm,
5    ReviewPerspective, ReviewStats,
6};
7use crate::review_trajectory::{RuleSource, TrajectoryBuilder, TrajectoryStep};
8use gate4agent::CliTool;
9
10mod chat;
11mod judge;
12mod resolver;
13mod rules;
14mod validate;
15
16pub(super) use chat::resolve_review_engine;
17#[cfg(test)]
18pub(super) use validate::{run_review_summary, verify_pass};
19
20use chat::{
21    PerspectiveRun, call_review_engine, get_active_provider, make_review_llm, run_one_perspective,
22};
23use rules::{build_recalled_verdicts, recall_past_verdicts_for_review};
24use validate::{
25    run_review_summary as run_review_summary_internal, verify_pass as verify_pass_internal,
26};
27
28pub(super) fn repo_scopes_for_input(input: &ReviewCheckInput) -> Vec<String> {
29    let mut scopes = Vec::new();
30    if let Some(repo) = input.repo_full_name.as_deref() {
31        let repo = repo.trim();
32        if !repo.is_empty() {
33            scopes.push(repo.to_owned());
34        }
35    }
36    for repo in &input.repo_full_name_aliases {
37        let repo = repo.trim();
38        if repo.is_empty() {
39            continue;
40        }
41        if !scopes
42            .iter()
43            .any(|existing| existing.eq_ignore_ascii_case(repo))
44        {
45            scopes.push(repo.to_owned());
46        }
47    }
48    scopes
49}
50
51/// Candidate rule pool size requested at REVIEW time when the applicability
52/// judge is enabled. Review is latency-tolerant (unlike the 800ms commit
53/// hook), so it pulls a deeper pool than the production default
54/// ([`crate::context::DEFAULT_TOP_K_RULES`]) and lets the judge filter it
55/// down to the rules that actually apply. The assembler's `rule_token_budget`
56/// still bounds what reaches the prompt, so this only deepens the judge's
57/// candidate set, not the final injected-rule volume.
58const JUDGE_CANDIDATE_POOL_TOP_K: usize = 18;
59
60/// Outcome of preparing the review's matched-rule context: the rendered
61/// rules text plus the parallel id/title/count bookkeeping the rest of the
62/// pipeline (attribution, trajectory, result) consumes.
63struct PreparedReviewRules {
64    rules_text: Option<String>,
65    count: i32,
66    ids: Vec<String>,
67    titles: Vec<String>,
68}
69
70/// Join a pool of rule items into the `rules_text` blob the review prompt
71/// expects (one rule's `content` per section, blank-line separated) — the
72/// same shape `intent_filter::maybe_rerank_for_review` produces. Used to
73/// rebuild the text after the applicability judge drops rules from the pool.
74fn rules_text_from_items(
75    items: &[crate::context::types::ContextSourceItemRecord],
76) -> Option<String> {
77    if items.is_empty() {
78        return None;
79    }
80    Some(
81        items
82            .iter()
83            .map(|item| item.content.clone())
84            .collect::<Vec<_>>()
85            .join("\n\n"),
86    )
87}
88
89/// Shared matched-rule preparation for both the single-pass and
90/// multi-perspective review paths.
91///
92/// Steps: (1) retrieve the candidate rule pool via the context orchestrator —
93/// deepened to [`JUDGE_CANDIDATE_POOL_TOP_K`] when the applicability judge is
94/// enabled; (2) apply the existing intent rerank; (3) when the judge is
95/// enabled, ask the review LLM which recalled rules actually apply to this
96/// diff and drop the rest, rebuilding `rules_text`/ids/titles from the
97/// survivors.
98///
99/// With the judge flag OFF this is behaviourally identical to the inline
100/// logic it replaced: same `DEFAULT_TOP_K_RULES` retrieval, same rerank, and
101/// the original `rules_text` is returned untouched (no rebuild), preserving
102/// byte-identical prompts.
103async fn prepare_review_rules(
104    db: &sqlx::SqlitePool,
105    input: &ReviewCheckInput,
106    retrieval_query: &str,
107    repo_scopes: &[String],
108    judge_llm: &dyn ReviewLlm,
109    review_engine: &crate::models::ReviewEngineRecord,
110    log_tag: &str,
111) -> PreparedReviewRules {
112    if input.project_id.is_empty() {
113        return PreparedReviewRules {
114            rules_text: None,
115            count: 0,
116            ids: Vec::new(),
117            titles: Vec::new(),
118        };
119    }
120
121    let judge_enabled = review_engine.rule_applicability_judge;
122    // Deepen the candidate pool only when the judge will filter it back down,
123    // so the flag-off path keeps the exact production retrieval depth.
124    let top_k_override = judge_enabled.then_some(JUDGE_CANDIDATE_POOL_TOP_K);
125
126    let pack = match crate::context::orchestrator::prepare_with_hint_and_repo_scopes_with_top_k(
127        db,
128        &input.project_id,
129        input.engine.as_deref().unwrap_or("claude"),
130        retrieval_query,
131        Some("review"),
132        input.file_path.as_deref(),
133        repo_scopes,
134        top_k_override,
135    )
136    .await
137    {
138        Ok(pack) => pack,
139        Err(e) => {
140            eprintln!("[{log_tag}] context prepare failed: {e:?}, proceeding without rules");
141            return PreparedReviewRules {
142                rules_text: None,
143                count: 0,
144                ids: Vec::new(),
145                titles: Vec::new(),
146            };
147        }
148    };
149
150    let reranked =
151        crate::context::intent_filter::maybe_rerank_for_review(&pack.rule_context, retrieval_query);
152
153    // Flag-OFF fast path: reproduce the original inline bookkeeping exactly
154    // (rerank count from the reranked len, else the assembler's
155    // `metadata.rule_count`; ids/titles from whichever item set), so the
156    // default review path stays byte-for-byte identical.
157    if !judge_enabled {
158        let (rules_text, count, ids, titles) = if let Some((reranked, rules_text)) = reranked {
159            let count = i32::try_from(reranked.len()).unwrap_or(i32::MAX);
160            let (ids, titles) = matched_rule_ids_and_titles(&reranked);
161            (rules_text, count, ids, titles)
162        } else {
163            let count = i32::try_from(pack.metadata.rule_count).unwrap_or(i32::MAX);
164            let (ids, titles) = matched_rule_ids_and_titles(&pack.rule_context);
165            (pack.sections.rules, count, ids, titles)
166        };
167        return PreparedReviewRules {
168            rules_text,
169            count,
170            ids,
171            titles,
172        };
173    }
174
175    // Judge-ON path: resolve a concrete working pool (the reranked items when
176    // rerank is active, else the full retrieved context), let the judge drop
177    // non-applicable rules, then derive text/ids/titles from the FINAL pool so
178    // all three stay mutually consistent — `rules_text_from_items` joins rule
179    // `content` exactly as the rerank path does, so an unchanged pool yields
180    // the same text the rerank would have.
181    let pool: Vec<_> = match reranked {
182        Some((reranked, _reranked_text)) => reranked,
183        None => pack.rule_context.clone(),
184    };
185
186    let pool = judge::run_applicability_judge(judge_llm, true, &input.diff_content, pool).await;
187
188    let rules_text = rules_text_from_items(&pool);
189    let count = i32::try_from(pool.len()).unwrap_or(i32::MAX);
190    let (ids, titles) = matched_rule_ids_and_titles(&pool);
191    PreparedReviewRules {
192        rules_text,
193        count,
194        ids,
195        titles,
196    }
197}
198
199pub(in super::super) fn count_blocking(issues: &[ReviewIssueRecord]) -> (u32, u32) {
200    let mut blocking = 0u32;
201    let mut non_blocking = 0u32;
202    for i in issues {
203        match i.severity.as_str() {
204            "error" | "critical" => blocking += 1,
205            _ => non_blocking += 1,
206        }
207    }
208    (blocking, non_blocking)
209}
210
211pub(in super::super) fn collect_diff_files(diff: &str) -> Vec<String> {
212    let mut out: Vec<String> = Vec::new();
213    for line in diff.lines() {
214        if let Some(rest) = line.strip_prefix("+++ ") {
215            let file = rest.strip_prefix("b/").unwrap_or(rest).trim().to_owned();
216            if file.is_empty() || file == "/dev/null" {
217                continue;
218            }
219            if !out.iter().any(|f| f == &file) {
220                out.push(file);
221            }
222        }
223    }
224    out
225}
226
227/// How `run_review` talks to the LLM: remote HTTP provider when one is
228/// configured, else a local agent CLI driven through `gate4agent` (Claude
229/// Code, Codex, Gemini, or `OpenCode` — whichever is installed). Resolved
230/// once per review by `resolve_review_engine`.
231#[derive(Debug, Clone)]
232pub enum ReviewEngine {
233    HttpProvider {
234        provider_name: String,
235        base_url: String,
236        api_key: String,
237        model: String,
238    },
239    AgentCli {
240        tool: CliTool,
241        /// Empty string lets the CLI default kick in. Populated when the
242        /// user explicitly configured a model in `providers setup`.
243        model: String,
244    },
245}
246
247/// Merge issues from multiple perspective passes.
248///
249/// Dedupe key: `(file, line, rule_id_or_rule)`. When duplicates exist, the
250/// issue with the highest severity wins, and the `perspectives` vector
251/// lists every perspective (in fixed canonical order) whose pass flagged it.
252pub fn merge_perspective_issues(
253    per_perspective: Vec<(ReviewPerspective, Vec<ReviewIssueRecord>)>,
254) -> Vec<ReviewIssueRecord> {
255    use std::collections::BTreeMap;
256
257    // Preserve first-seen order while still deduping by key.
258    let mut order: Vec<String> = Vec::new();
259    let mut merged: BTreeMap<String, ReviewIssueRecord> = BTreeMap::new();
260
261    for (persp, issues) in per_perspective {
262        let persp_name = persp.name();
263        for mut issue in issues {
264            let key = format!(
265                "{}|{}|{}",
266                issue.file.as_deref().unwrap_or_default(),
267                issue.line.map(|n| n.to_string()).unwrap_or_default(),
268                issue.rule_id.as_deref().unwrap_or(issue.rule.as_str()),
269            );
270
271            if let Some(existing) = merged.get_mut(&key) {
272                if severity_rank(&issue.severity) > severity_rank(&existing.severity) {
273                    let mut perspectives = existing.perspectives.clone();
274                    if !perspectives.iter().any(|p| p == persp_name) {
275                        perspectives.push(persp_name.to_owned());
276                    }
277                    issue.perspectives = perspectives;
278                    *existing = issue;
279                } else if !existing.perspectives.iter().any(|p| p == persp_name) {
280                    existing.perspectives.push(persp_name.to_owned());
281                }
282            } else {
283                if !issue.perspectives.iter().any(|p| p == persp_name) {
284                    issue.perspectives.push(persp_name.to_owned());
285                }
286                order.push(key.clone());
287                merged.insert(key, issue);
288            }
289        }
290    }
291
292    // Reorder perspectives on each issue to a stable canonical order.
293    let canonical = [
294        ReviewPerspective::Safety.name(),
295        ReviewPerspective::Performance.name(),
296        ReviewPerspective::Style.name(),
297        ReviewPerspective::Docs.name(),
298        ReviewPerspective::ApiDesign.name(),
299    ];
300
301    order
302        .into_iter()
303        .filter_map(|k| merged.remove(&k))
304        .map(|mut issue| {
305            let mut sorted: Vec<String> = canonical
306                .iter()
307                .filter(|c| issue.perspectives.iter().any(|p| p == *c))
308                .map(ToString::to_string)
309                .collect();
310            // Append any non-canonical perspectives at the end (defensive).
311            for p in &issue.perspectives {
312                if !sorted.iter().any(|s| s == p) {
313                    sorted.push(p.clone());
314                }
315            }
316            issue.perspectives = sorted;
317            issue
318        })
319        .collect()
320}
321
322fn matched_rule_ids_and_titles(
323    rule_context: &[crate::context::types::ContextSourceItemRecord],
324) -> (Vec<String>, Vec<String>) {
325    let ids = rule_context
326        .iter()
327        .map(|item| item.source_id.clone())
328        .collect();
329    let titles = rule_context
330        .iter()
331        .map(|item| {
332            item.title
333                .clone()
334                .filter(|title| !title.trim().is_empty())
335                .unwrap_or_else(|| item.source_id.clone())
336        })
337        .collect();
338    (ids, titles)
339}
340
341fn issue_text_for_attribution(issue: &ReviewIssueRecord) -> String {
342    format!(
343        "{} {} {} {}",
344        issue.rule,
345        issue.message,
346        issue.suggestion.as_deref().unwrap_or_default(),
347        issue.file.as_deref().unwrap_or_default(),
348    )
349    .to_ascii_lowercase()
350}
351
352fn contains_any(text: &str, needles: &[&str]) -> bool {
353    needles.iter().any(|needle| text.contains(needle))
354}
355
356fn is_workflow_pin_issue(issue: &ReviewIssueRecord) -> bool {
357    let text = issue_text_for_attribution(issue);
358    let workflow_context = issue
359        .file
360        .as_deref()
361        .is_some_and(|file| file.contains(".github/workflows/"))
362        || contains_any(
363            &text,
364            &[
365                "github action",
366                "actions/",
367                "uses:",
368                "workflow",
369                "checkout@",
370            ],
371        );
372    let pin_context = contains_any(
373        &text,
374        &[
375            "pin",
376            "sha",
377            "immutable",
378            "mutable",
379            "floating",
380            "@main",
381            "@master",
382        ],
383    );
384    workflow_context && pin_context
385}
386
387fn is_workflow_pin_rule_title(title: &str) -> bool {
388    let text = title.to_ascii_lowercase();
389    contains_any(&text, &["github action", "actions", "workflow"])
390        && contains_any(&text, &["pin", "sha", "immutable"])
391}
392
393fn attribution_tokens(text: &str) -> std::collections::BTreeSet<String> {
394    const STOPWORDS: &[&str] = &[
395        "the", "and", "for", "from", "into", "with", "this", "that", "must", "should", "would",
396        "could", "rule", "rules", "file", "line", "review", "code", "when", "where", "than",
397        "then", "they", "them", "your", "their",
398    ];
399    text.split(|c: char| !c.is_ascii_alphanumeric())
400        .filter_map(|raw| {
401            let token = raw.trim().to_ascii_lowercase();
402            if token.is_empty() || token.len() < 3 {
403                return None;
404            }
405            let token = match token.as_str() {
406                "shas" => "sha".to_owned(),
407                "references" => "reference".to_owned(),
408                other => other.to_owned(),
409            };
410            (!STOPWORDS.contains(&token.as_str())).then_some(token)
411        })
412        .collect()
413}
414
415fn infer_rule_id_for_issue(
416    issue: &ReviewIssueRecord,
417    matched_rule_ids: &[String],
418    matched_rule_titles: &[String],
419) -> Option<String> {
420    if matched_rule_ids.is_empty() {
421        return None;
422    }
423
424    if is_workflow_pin_issue(issue)
425        && let Some((idx, _)) = matched_rule_titles
426            .iter()
427            .enumerate()
428            .find(|(_, title)| is_workflow_pin_rule_title(title))
429    {
430        return matched_rule_ids.get(idx).cloned();
431    }
432
433    let issue_tokens = attribution_tokens(&issue_text_for_attribution(issue));
434    if issue_tokens.is_empty() {
435        return None;
436    }
437
438    let mut best: Option<(usize, f32, usize)> = None;
439    let mut second_best = 0.0_f32;
440    for (idx, title) in matched_rule_titles.iter().enumerate() {
441        let title_tokens = attribution_tokens(title);
442        if title_tokens.is_empty() {
443            continue;
444        }
445        let overlap = title_tokens
446            .iter()
447            .filter(|token| issue_tokens.contains(*token))
448            .count();
449        if overlap < 2 {
450            continue;
451        }
452        let score = overlap as f32 / title_tokens.len() as f32;
453        match best {
454            Some((_, best_score, _)) if score > best_score => {
455                second_best = best_score;
456                best = Some((idx, score, overlap));
457            }
458            Some(_) => {
459                second_best = second_best.max(score);
460            }
461            None => best = Some((idx, score, overlap)),
462        }
463    }
464
465    let (idx, score, overlap) = best?;
466    if overlap >= 2 && score >= 0.60 && score >= second_best + 0.15 {
467        matched_rule_ids.get(idx).cloned()
468    } else {
469        None
470    }
471}
472
473fn apply_missing_rule_attributions(
474    issues: &mut [ReviewIssueRecord],
475    matched_rule_ids: &[String],
476    matched_rule_titles: &[String],
477) {
478    for issue in issues {
479        if issue
480            .rule_id
481            .as_deref()
482            .is_some_and(|rule_id| !rule_id.trim().is_empty())
483        {
484            continue;
485        }
486        if let Some(rule_id) = infer_rule_id_for_issue(issue, matched_rule_ids, matched_rule_titles)
487        {
488            issue.rule_id = Some(rule_id);
489        }
490    }
491}
492
493/// Apply hunk-aware line resolution to a batch of issues.
494///
495/// For each issue, parses the hunks of the file it touches out of the raw
496/// unified `diff`, then snaps `issue.line` to the exact new-file line via
497/// [`resolver::resolve_issue_lines`] using the model-supplied `snippets`
498/// (parallel to `issues`) and the issue's claimed line. Issues whose file
499/// can't be found in the diff, or that don't confidently match, are left
500/// untouched — this only ever sharpens a line number, never regresses it.
501///
502/// `snippets[i]` is the optional verbatim source for `issues[i]` (from
503/// `parse::extract_issue_snippets`); a shorter/empty slice is tolerated.
504fn apply_hunk_line_resolution(
505    issues: &mut [ReviewIssueRecord],
506    snippets: &[Option<String>],
507    diff: &str,
508) {
509    use std::collections::HashMap;
510
511    // Parse hunks once per file path, lazily, and cache.
512    let sections = split_diff_by_file(diff);
513    let mut cache: HashMap<String, Vec<resolver::DiffHunk>> = HashMap::new();
514
515    for (idx, issue) in issues.iter_mut().enumerate() {
516        let Some(file) = issue.file.as_deref() else {
517            continue;
518        };
519        let hunks = cache.entry(file.to_owned()).or_insert_with(|| {
520            sections
521                .get(file)
522                .map(|section| resolver::parse_hunks(section))
523                .unwrap_or_default()
524        });
525        if hunks.is_empty() {
526            continue;
527        }
528        let target = resolver::ResolveTarget {
529            snippet: snippets.get(idx).and_then(Clone::clone),
530            claimed_line: issue.line,
531        };
532        if let Some((start, _end)) = resolver::resolve_issue_lines(&target, hunks) {
533            issue.line = Some(start);
534        }
535    }
536}
537
538/// Split a multi-file unified diff into `path -> section` so each file's
539/// hunks can be parsed independently. The section is the slice starting at
540/// the file's `@@` hunks (file headers are tolerated by the hunk parser).
541/// Keyed by the new-side (`+++ b/…`) path so it matches `issue.file`.
542fn split_diff_by_file(diff: &str) -> std::collections::HashMap<String, String> {
543    let mut out = std::collections::HashMap::new();
544    let mut current_path: Option<String> = None;
545    let mut current_body = String::new();
546
547    let flush = |path: &mut Option<String>,
548                 body: &mut String,
549                 out: &mut std::collections::HashMap<String, String>| {
550        if let Some(p) = path.take() {
551            if body.trim().is_empty() {
552                body.clear();
553            } else {
554                out.insert(p, std::mem::take(body));
555            }
556        }
557    };
558
559    for line in diff.lines() {
560        if line.starts_with("diff --git ") {
561            flush(&mut current_path, &mut current_body, &mut out);
562            current_path = None;
563            current_body.clear();
564        } else if let Some(rest) = line.strip_prefix("+++ ") {
565            let path = rest.strip_prefix("b/").unwrap_or(rest).trim();
566            if !path.is_empty() && path != "/dev/null" {
567                current_path = Some(path.to_owned());
568            }
569        }
570        if current_path.is_some() {
571            current_body.push_str(line);
572            current_body.push('\n');
573        }
574    }
575    flush(&mut current_path, &mut current_body, &mut out);
576    out
577}
578
579/// Multi-perspective review.
580pub async fn run_review_multi(
581    db: &sqlx::SqlitePool,
582    input: ReviewCheckInput,
583) -> crate::Result<ReviewCheckResult> {
584    run_review_multi_with_trajectory(db, input, None).await
585}
586
587/// Trajectory-aware variant of `run_review_multi`.
588pub async fn run_review_multi_with_trajectory(
589    db: &sqlx::SqlitePool,
590    input: ReviewCheckInput,
591    mut trajectory: Option<&mut TrajectoryBuilder>,
592) -> crate::Result<ReviewCheckResult> {
593    let trace_id = uuid::Uuid::new_v4().to_string();
594
595    // 1. Get active provider (once — shared by all perspectives)
596    let (provider_name, base_url, api_key, model) = get_active_provider(db).await?;
597
598    let retrieval_intent = crate::context::intent_filter::build_review_intent_text(
599        input.file_path.as_deref(),
600        &input.diff_content,
601    );
602    let retrieval_query = if retrieval_intent.trim().is_empty() {
603        input.diff_content.as_str()
604    } else {
605        retrieval_intent.as_str()
606    };
607    let repo_scopes = repo_scopes_for_input(&input);
608
609    // Settings drive the (opt-in) applicability judge below and the
610    // past-verdict recall / self-check / summary gating further down.
611    let settings_for_recall = crate::settings::get().await.unwrap_or_default();
612
613    // 2. Get matched rules via context engine (once — shared by all
614    // perspectives). The applicability judge, when enabled, reuses the active
615    // provider through a dedicated `HttpReviewLlm` built from the same tuple.
616    let judge_llm = HttpReviewLlm {
617        provider_name: provider_name.clone(),
618        base_url: base_url.clone(),
619        api_key: api_key.clone(),
620        model: model.clone(),
621    };
622    let prepared = prepare_review_rules(
623        db,
624        &input,
625        retrieval_query,
626        &repo_scopes,
627        &judge_llm,
628        &settings_for_recall.review_engine,
629        "review_check_multi",
630    )
631    .await;
632    let PreparedReviewRules {
633        rules_text,
634        count: matched_rules,
635        ids: matched_rule_ids,
636        titles: matched_rule_titles,
637    } = prepared;
638
639    if let Some(tb) = trajectory.as_deref_mut() {
640        tb.push(TrajectoryStep::ChunksRetrieved {
641            count: matched_rules.try_into().unwrap_or(usize::MAX),
642            symbols: matched_rule_titles.clone(),
643            similarity_scores: Vec::new(),
644        });
645        tb.push(TrajectoryStep::RulesApplied {
646            rule_ids: matched_rule_ids.clone(),
647            source: RuleSource::Team,
648        });
649    }
650
651    // 3. Shared user prompt (once — identical across perspectives)
652    let user_prompt = build_user_prompt(
653        &input.diff_content,
654        rules_text.as_deref(),
655        input.file_path.as_deref(),
656    );
657    let prompt_tokens_estimate = (i32::try_from(user_prompt.len())
658        .unwrap_or(i32::MAX)
659        .saturating_add(3))
660        / 4;
661
662    // 4. Past-verdict recall. Preview callers need a bounded first answer;
663    // they can inspect memory separately with `difflore recall --diff`.
664    let past_verdicts = if input.fast_preview {
665        Vec::new()
666    } else {
667        recall_past_verdicts_for_review(
668            &settings_for_recall,
669            &input.diff_content,
670            if input.project_id.is_empty() {
671                None
672            } else {
673                Some(&input.project_id)
674            },
675            &repo_scopes,
676        )
677        .await
678    };
679
680    if let Some(tb) = trajectory.as_deref_mut() {
681        let recalled_items = build_recalled_verdicts(&past_verdicts);
682        let top_similarities: Vec<f32> =
683            recalled_items.iter().map(|item| item.similarity).collect();
684        tb.push(TrajectoryStep::PastVerdictsRecalled {
685            count: past_verdicts.len(),
686            top_similarities,
687            recalled_items,
688        });
689    }
690
691    let (safety_issues, perf_issues, style_issues, docs_issues, api_design_issues) = tokio::join!(
692        run_one_perspective(PerspectiveRun {
693            provider_name: &provider_name,
694            base_url: &base_url,
695            api_key: &api_key,
696            model: &model,
697            user_prompt: &user_prompt,
698            perspective: ReviewPerspective::Safety,
699            diff_content: &input.diff_content,
700            past_verdicts: &past_verdicts,
701        }),
702        run_one_perspective(PerspectiveRun {
703            provider_name: &provider_name,
704            base_url: &base_url,
705            api_key: &api_key,
706            model: &model,
707            user_prompt: &user_prompt,
708            perspective: ReviewPerspective::Performance,
709            diff_content: &input.diff_content,
710            past_verdicts: &past_verdicts,
711        }),
712        run_one_perspective(PerspectiveRun {
713            provider_name: &provider_name,
714            base_url: &base_url,
715            api_key: &api_key,
716            model: &model,
717            user_prompt: &user_prompt,
718            perspective: ReviewPerspective::Style,
719            diff_content: &input.diff_content,
720            past_verdicts: &past_verdicts,
721        }),
722        run_one_perspective(PerspectiveRun {
723            provider_name: &provider_name,
724            base_url: &base_url,
725            api_key: &api_key,
726            model: &model,
727            user_prompt: &user_prompt,
728            perspective: ReviewPerspective::Docs,
729            diff_content: &input.diff_content,
730            past_verdicts: &past_verdicts,
731        }),
732        run_one_perspective(PerspectiveRun {
733            provider_name: &provider_name,
734            base_url: &base_url,
735            api_key: &api_key,
736            model: &model,
737            user_prompt: &user_prompt,
738            perspective: ReviewPerspective::ApiDesign,
739            diff_content: &input.diff_content,
740            past_verdicts: &past_verdicts,
741        }),
742    );
743
744    if let Some(tb) = trajectory.as_deref_mut() {
745        let per_call_input = u32::try_from(prompt_tokens_estimate).unwrap_or(u32::MAX);
746        for perspective in ReviewPerspective::all() {
747            tb.push(TrajectoryStep::LlmCall {
748                perspective: perspective.name().to_owned(),
749                input_tokens: per_call_input,
750                output_tokens: 0,
751                raw_output: None,
752            });
753        }
754    }
755
756    let issues = merge_perspective_issues(vec![
757        (ReviewPerspective::Safety, safety_issues),
758        (ReviewPerspective::Performance, perf_issues),
759        (ReviewPerspective::Style, style_issues),
760        (ReviewPerspective::Docs, docs_issues),
761        (ReviewPerspective::ApiDesign, api_design_issues),
762    ]);
763
764    // Provider tuple is no longer needed after the per-perspective fan-out,
765    // so move (don't clone) into the verify-pass LLM box.
766    let llm: Box<dyn ReviewLlm> = Box::new(HttpReviewLlm {
767        provider_name,
768        base_url,
769        api_key,
770        model,
771    });
772    let pre_verify_count = issues.len();
773    let issues = verify_pass_internal(
774        llm.as_ref(),
775        settings_for_recall.review_engine.self_check_enabled && !input.fast_preview,
776        &input.diff_content,
777        issues,
778    )
779    .await;
780
781    if let Some(tb) = trajectory.as_deref_mut() {
782        let keep_count = u32::try_from(issues.len()).unwrap_or(u32::MAX);
783        let drop_count =
784            u32::try_from(pre_verify_count.saturating_sub(issues.len())).unwrap_or(u32::MAX);
785        let avg_confidence = if issues.is_empty() {
786            0.0
787        } else {
788            issues.iter().map(|i| i.confidence).sum::<f32>() / (issues.len() as f32)
789        };
790        tb.push(TrajectoryStep::SelfCheck {
791            keep_count,
792            drop_count,
793            avg_confidence,
794        });
795    }
796
797    let mut issues = issues;
798    apply_missing_rule_attributions(&mut issues, &matched_rule_ids, &matched_rule_titles);
799    // Hunk-aware line snap (gated; default off). The multi-pass merge does not
800    // retain snippets, so this path snaps using the claimed line only.
801    if settings_for_recall.review_engine.hunk_line_resolution {
802        apply_hunk_line_resolution(&mut issues, &[], &input.diff_content);
803    }
804    issues.sort_by(|a, b| {
805        b.confidence
806            .partial_cmp(&a.confidence)
807            .unwrap_or(std::cmp::Ordering::Equal)
808    });
809
810    let summary = run_review_summary_internal(
811        llm.as_ref(),
812        settings_for_recall.review_engine.review_summary_enabled && !input.fast_preview,
813        &input.diff_content,
814        &issues,
815    )
816    .await;
817
818    if let Some(tb) = trajectory.as_deref_mut() {
819        let ids = issues
820            .iter()
821            .map(|i| i.rule_id.clone().unwrap_or_else(|| i.rule.clone()))
822            .collect();
823        tb.push(TrajectoryStep::FinalDecision {
824            issue_ids_emitted: ids,
825        });
826    }
827
828    let stats = ReviewStats {
829        input_tokens: u32::try_from(prompt_tokens_estimate.max(0)).unwrap_or(u32::MAX),
830        duration_ms: None,
831        perspective_count: 5,
832        past_verdicts_used: u32::try_from(past_verdicts.len()).unwrap_or(u32::MAX),
833        trajectory_step_count: trajectory
834            .as_deref()
835            .map(|tb| u32::try_from(tb.len()).unwrap_or(u32::MAX)),
836    };
837
838    Ok(ReviewCheckResult {
839        issues,
840        matched_rules,
841        matched_rule_ids,
842        matched_rule_titles,
843        prompt_tokens_estimate,
844        trace_id,
845        summary,
846        stats: Some(stats),
847    })
848}
849
850/// Stable label for the code path selected by `run_review_smart`.
851pub const fn select_review_mode(multi_perspective: bool) -> &'static str {
852    if multi_perspective { "multi" } else { "single" }
853}
854
855#[allow(clippy::items_after_test_module)]
856#[cfg(test)]
857mod tests {
858    use super::*;
859
860    fn review_input(repo: Option<&str>, aliases: Vec<&str>) -> ReviewCheckInput {
861        ReviewCheckInput {
862            project_id: "project-1".to_owned(),
863            diff_content: String::new(),
864            file_path: None,
865            engine: None,
866            review_id: None,
867            repo_full_name: repo.map(str::to_owned),
868            repo_full_name_aliases: aliases.into_iter().map(str::to_owned).collect(),
869            fast_preview: false,
870        }
871    }
872
873    #[test]
874    fn repo_scopes_include_origin_and_upstream_aliases() {
875        let input = review_input(
876            Some("difflore-fixtures/router"),
877            vec!["difflore-fixtures/router", "tanstack/router"],
878        );
879
880        assert_eq!(
881            repo_scopes_for_input(&input),
882            vec![
883                "difflore-fixtures/router".to_owned(),
884                "tanstack/router".to_owned(),
885            ],
886        );
887    }
888
889    #[test]
890    fn repo_scopes_dedupe_aliases_case_insensitively() {
891        let input = review_input(
892            Some("TanStack/router"),
893            vec!["tanstack/router", "  ", "difflore-fixtures/router"],
894        );
895
896        assert_eq!(
897            repo_scopes_for_input(&input),
898            vec![
899                "TanStack/router".to_owned(),
900                "difflore-fixtures/router".to_owned(),
901            ],
902        );
903    }
904
905    #[test]
906    fn fast_preview_input_marks_secondary_review_passes_skippable() {
907        let mut input = review_input(Some("owner/repo"), vec![]);
908        assert!(!input.fast_preview);
909
910        input.fast_preview = true;
911
912        assert!(input.fast_preview);
913    }
914
915    #[test]
916    fn workflow_pin_issue_gets_recalled_rule_id_when_model_omits_it() {
917        let issue = ReviewIssueRecord {
918            severity: "warning".to_owned(),
919            rule: "Pin GitHub Actions to immutable references".to_owned(),
920            rule_id: None,
921            message: "actions/checkout@main is a floating ref".to_owned(),
922            file: Some(".github/workflows/pr.yml".to_owned()),
923            line: Some(26),
924            suggestion: Some("Use a commit SHA instead of main.".to_owned()),
925            source_badge: None,
926            perspectives: Vec::new(),
927            confidence: 0.98,
928        };
929
930        let rule_id = infer_rule_id_for_issue(
931            &issue,
932            &[
933                "pin-actions-rule".to_owned(),
934                "version-update-rule".to_owned(),
935            ],
936            &[
937                "Pin Actions to commit SHAs".to_owned(),
938                "Update GitHub Actions versions atomically".to_owned(),
939            ],
940        );
941
942        assert_eq!(rule_id.as_deref(), Some("pin-actions-rule"));
943    }
944
945    #[test]
946    fn missing_rule_attribution_stays_empty_for_ambiguous_text() {
947        let mut issues = vec![ReviewIssueRecord {
948            severity: "warning".to_owned(),
949            rule: "Improve code".to_owned(),
950            rule_id: None,
951            message: "This should be cleaner.".to_owned(),
952            file: Some("src/lib.rs".to_owned()),
953            line: Some(1),
954            suggestion: Some("Refactor it.".to_owned()),
955            source_badge: None,
956            perspectives: Vec::new(),
957            confidence: 0.8,
958        }];
959
960        apply_missing_rule_attributions(
961            &mut issues,
962            &["pin-actions-rule".to_owned()],
963            &["Pin Actions to commit SHAs".to_owned()],
964        );
965
966        assert!(issues[0].rule_id.is_none());
967    }
968
969    const MULTI_FILE_DIFF: &str = "\
970diff --git a/src/a.rs b/src/a.rs
971index 1111111..2222222 100644
972--- a/src/a.rs
973+++ b/src/a.rs
974@@ -5,4 +5,5 @@ fn a() {
975     let x = 1;
976     let y = 2;
977+    let z = dangerous(x, y);
978     done();
979 }
980diff --git a/src/b.rs b/src/b.rs
981index 3333333..4444444 100644
982--- a/src/b.rs
983+++ b/src/b.rs
984@@ -20,3 +20,4 @@ fn b() {
985     setup();
986+    let secret = read_env();
987     teardown();
988";
989
990    fn issue_at(file: &str, line: i32) -> ReviewIssueRecord {
991        ReviewIssueRecord {
992            severity: "warning".to_owned(),
993            rule: "r".to_owned(),
994            rule_id: None,
995            message: "m".to_owned(),
996            file: Some(file.to_owned()),
997            line: Some(line),
998            suggestion: None,
999            source_badge: None,
1000            perspectives: Vec::new(),
1001            confidence: 0.9,
1002        }
1003    }
1004
1005    #[test]
1006    fn split_diff_by_file_keys_on_new_side_path() {
1007        let map = split_diff_by_file(MULTI_FILE_DIFF);
1008        assert_eq!(map.len(), 2);
1009        assert!(map.contains_key("src/a.rs"));
1010        assert!(map.contains_key("src/b.rs"));
1011        assert!(map["src/a.rs"].contains("dangerous(x, y)"));
1012        assert!(map["src/b.rs"].contains("read_env()"));
1013    }
1014
1015    #[test]
1016    fn hunk_resolution_snaps_issue_to_exact_line_via_snippet() {
1017        let mut issues = vec![issue_at("src/a.rs", 999), issue_at("src/b.rs", 1)];
1018        let snippets = vec![
1019            Some("let z = dangerous(x, y);".to_owned()),
1020            Some("let secret = read_env();".to_owned()),
1021        ];
1022        apply_hunk_line_resolution(&mut issues, &snippets, MULTI_FILE_DIFF);
1023        // a.rs new-side: 5,6 context, 7 added `z`, ... → line 7.
1024        assert_eq!(issues[0].line, Some(7));
1025        // b.rs new-side: 20 context, 21 added `secret` → line 21.
1026        assert_eq!(issues[1].line, Some(21));
1027    }
1028
1029    #[test]
1030    fn hunk_resolution_leaves_line_when_file_not_in_diff() {
1031        let mut issues = vec![issue_at("src/unknown.rs", 42)];
1032        let snippets = vec![Some("whatever".to_owned())];
1033        apply_hunk_line_resolution(&mut issues, &snippets, MULTI_FILE_DIFF);
1034        assert_eq!(issues[0].line, Some(42), "untouched when no diff section");
1035    }
1036
1037    #[test]
1038    fn hunk_resolution_snaps_via_claimed_line_without_snippet() {
1039        // No snippets at all (mirrors the multi-perspective path). The model
1040        // claimed line 6 in a.rs, which is a real new-side context line.
1041        let mut issues = vec![issue_at("src/a.rs", 6)];
1042        apply_hunk_line_resolution(&mut issues, &[], MULTI_FILE_DIFF);
1043        assert_eq!(issues[0].line, Some(6));
1044    }
1045
1046    #[test]
1047    fn hunk_resolution_tolerates_shorter_snippet_slice() {
1048        // snippets slice shorter than issues — extra issues fall back to
1049        // claimed-line snap, no panic.
1050        let mut issues = vec![issue_at("src/a.rs", 7), issue_at("src/b.rs", 21)];
1051        let snippets = vec![Some("let z = dangerous(x, y);".to_owned())];
1052        apply_hunk_line_resolution(&mut issues, &snippets, MULTI_FILE_DIFF);
1053        assert_eq!(issues[0].line, Some(7));
1054        assert_eq!(issues[1].line, Some(21));
1055    }
1056
1057    #[test]
1058    fn hunk_resolution_falls_back_when_nothing_matches() {
1059        // Backward-safety guarantee: the file IS in the diff, but neither the
1060        // snippet nor the claimed line corresponds to any hunk line. Hunk
1061        // attribution must decline (resolver → None) and leave the model's
1062        // claimed line exactly as-is, so we degrade to the prior token-overlap
1063        // behaviour instead of inventing a wrong line.
1064        let mut issues = vec![issue_at("src/a.rs", 900)];
1065        let snippets = vec![Some("text that appears nowhere in the diff".to_owned())];
1066        apply_hunk_line_resolution(&mut issues, &snippets, MULTI_FILE_DIFF);
1067        assert_eq!(
1068            issues[0].line,
1069            Some(900),
1070            "no confident hunk match → claimed line preserved (no regression)"
1071        );
1072    }
1073
1074    #[test]
1075    fn hunk_resolution_maps_multiline_finding_to_range_start() {
1076        // A finding whose snippet spans two consecutive new-side lines must be
1077        // attributed to the START of that exact range. In a.rs the added
1078        // `dangerous` line is 7 and the following context `done();` is 8, so a
1079        // two-line snippet resolves to the range 7..=8; `issue.line` carries
1080        // the range start (7). This is the "exact changed line RANGE" mapping
1081        // the hunk resolver provides over token overlap.
1082        let mut issues = vec![issue_at("src/a.rs", 1)];
1083        let snippets = vec![Some("let z = dangerous(x, y);\ndone();".to_owned())];
1084        apply_hunk_line_resolution(&mut issues, &snippets, MULTI_FILE_DIFF);
1085        assert_eq!(
1086            issues[0].line,
1087            Some(7),
1088            "multi-line finding anchors on the first line of the changed range"
1089        );
1090    }
1091
1092    // === hunk_line_resolution end-to-end coverage (real captured diff) ===
1093    //
1094    // Real diff + real LLM response captured from a live `difflore fix
1095    // --preview` run against difflore-test-e2e/hono/src/compose.ts
1096    // (Sonnet via claude-cli). Ground-truth new-file lines: 42, 43, 52.
1097    //
1098    // This drives the EXACT production gate path: split_diff_by_file →
1099    // parse_hunks → resolve_issue_lines, via apply_hunk_line_resolution.
1100    // OFF = claimed line untouched; ON = apply_hunk_line_resolution.
1101
1102    const HONO_DIFF: &str = "\
1103--- a/src/compose.ts
1104+++ b/src/compose.ts
1105@@ -39,6 +39,9 @@ export const compose = <E extends Env = Env>(
1106       let isError = false
1107       let handler
1108
1109+      const apiKey = \"sk-live-1234567890abcdef\"
1110+      console.log(\"dispatching middleware at index \" + i + \" key=\" + apiKey)
1111+
1112       if (middleware[i]) {
1113         handler = middleware[i][0][0]
1114         context.req.routeIndex = i
1115@@ -46,6 +49,10 @@ export const compose = <E extends Env = Env>(
1116         handler = (i === middleware.length && next) || undefined
1117       }
1118
1119+      if (handler == null) {
1120+        handler = middleware[i][0][0]
1121+      }
1122+
1123       if (handler) {
1124         try {
1125           res = await handler(context, () => dispatch(i + 1))
1126";
1127
1128    // (real_snippet, ground_truth_new_file_line) for each issue the model
1129    // actually returned.
1130    fn hono_cases() -> Vec<(String, i32)> {
1131        vec![
1132            (
1133                "      const apiKey = \"sk-live-1234567890abcdef\"".to_owned(),
1134                42,
1135            ),
1136            (
1137                "      console.log(\"dispatching middleware at index \" + i + \" key=\" + apiKey)"
1138                    .to_owned(),
1139                43,
1140            ),
1141            (
1142                "      if (handler == null) {\n        handler = middleware[i][0][0]\n      }"
1143                    .to_owned(),
1144                52,
1145            ),
1146        ]
1147    }
1148
1149    /// Build the issue set + snippet set for a scenario.
1150    /// `claimed[i]` is the line the model "claimed"; `with_snippet` chooses
1151    /// whether the real snippet is supplied (snippet path) or not (claimed-
1152    /// line-only path).
1153    fn build(claimed: &[i32], with_snippet: bool) -> (Vec<ReviewIssueRecord>, Vec<Option<String>>) {
1154        let cases = hono_cases();
1155        let issues = claimed
1156            .iter()
1157            .map(|&l| issue_at("src/compose.ts", l))
1158            .collect();
1159        let snippets = cases
1160            .iter()
1161            .map(|(s, _)| if with_snippet { Some(s.clone()) } else { None })
1162            .collect();
1163        (issues, snippets)
1164    }
1165
1166    fn ground_truth() -> Vec<i32> {
1167        hono_cases().into_iter().map(|(_, gt)| gt).collect()
1168    }
1169
1170    /// Count how many issues land exactly on ground truth.
1171    fn precise_count(issues: &[ReviewIssueRecord], gt: &[i32]) -> usize {
1172        issues
1173            .iter()
1174            .zip(gt.iter())
1175            .filter(|(iss, g)| iss.line == Some(**g))
1176            .count()
1177    }
1178
1179    #[test]
1180    fn measure_real_response_off_equals_on_no_change() {
1181        // Scenario A: the REAL claimed lines the model returned (already
1182        // correct). OFF should equal ON — feature is a no-op here.
1183        let gt = ground_truth();
1184        let claimed_real = gt.clone(); // model claimed 42,43,52 == GT
1185        let (off_issues, snippets) = build(&claimed_real, true);
1186        let off_precise = precise_count(&off_issues, &gt);
1187
1188        let (mut on_issues, _) = build(&claimed_real, true);
1189        apply_hunk_line_resolution(&mut on_issues, &snippets, HONO_DIFF);
1190        let on_precise = precise_count(&on_issues, &gt);
1191
1192        let on_lines: Vec<_> = on_issues.iter().map(|i| i.line).collect();
1193        eprintln!(
1194            "[MEASURE A real-response] OFF precise={off_precise}/3 ON precise={on_precise}/3 ON_lines={on_lines:?}"
1195        );
1196        assert_eq!(off_precise, 3, "model already correct on this diff");
1197        assert_eq!(on_precise, 3, "ON keeps all correct (no regression)");
1198    }
1199
1200    #[test]
1201    fn measure_corrupted_lines_with_real_snippets() {
1202        // Scenario B: simulate the documented model failure modes the
1203        // resolver exists to fix (resolver.rs docstring: "diff-relative or
1204        // off-by-N numbers, or count from the hunk header"), with the REAL
1205        // snippets preserved.
1206        //
1207        //   issue0 (GT 42): diff-relative  -> 4  (counts within hunk body)
1208        //   issue1 (GT 43): off-by-2 high  -> 45
1209        //   issue2 (GT 52): hunk-header rel-> 49 (the @@ new_start, off by 3)
1210        let gt = ground_truth();
1211        let corrupted = vec![4, 45, 49];
1212
1213        let (off_issues, _) = build(&corrupted, true);
1214        let off_precise = precise_count(&off_issues, &gt);
1215
1216        let (mut on_issues, snippets) = build(&corrupted, true);
1217        apply_hunk_line_resolution(&mut on_issues, &snippets, HONO_DIFF);
1218        let on_precise = precise_count(&on_issues, &gt);
1219
1220        let off_lines: Vec<_> = off_issues.iter().map(|i| i.line).collect();
1221        let on_lines: Vec<_> = on_issues.iter().map(|i| i.line).collect();
1222        eprintln!(
1223            "[MEASURE B corrupted+snippet] GT={gt:?} corrupted={corrupted:?} \
1224             OFF_lines={off_lines:?} (precise {off_precise}/3) \
1225             ON_lines={on_lines:?} (precise {on_precise}/3)"
1226        );
1227        // Honest assertions: ON must recover all 3 via snippet; OFF gets 0.
1228        assert_eq!(off_precise, 0, "all corrupted lines are wrong");
1229        assert_eq!(on_precise, 3, "snippet match recovers exact line for all");
1230    }
1231
1232    #[test]
1233    fn measure_corrupted_lines_without_snippets_claimed_only() {
1234        // Scenario C: same corruption but NO snippets (mirrors the multi-
1235        // perspective merge path, which drops per-issue snippets). Tests the
1236        // weaker claimed-line snap + checks for any REGRESSION (snapping a
1237        // line further from GT than where it started).
1238        let gt = ground_truth();
1239        let corrupted = vec![4, 45, 49];
1240
1241        let (off_issues, _) = build(&corrupted, false);
1242        let off_precise = precise_count(&off_issues, &gt);
1243
1244        let (mut on_issues, _) = build(&corrupted, false);
1245        apply_hunk_line_resolution(&mut on_issues, &[], HONO_DIFF);
1246        let on_precise = precise_count(&on_issues, &gt);
1247
1248        // Regression detector: for each issue, did ON move it strictly
1249        // farther from GT than OFF was?
1250        let mut regressions = 0;
1251        for ((off, on), &g) in off_issues.iter().zip(on_issues.iter()).zip(gt.iter()) {
1252            let off_d = (off.line.unwrap_or(g) - g).abs();
1253            let on_d = (on.line.unwrap_or(g) - g).abs();
1254            if on_d > off_d {
1255                regressions += 1;
1256            }
1257        }
1258
1259        let off_lines: Vec<_> = off_issues.iter().map(|i| i.line).collect();
1260        let on_lines: Vec<_> = on_issues.iter().map(|i| i.line).collect();
1261        eprintln!(
1262            "[MEASURE C corrupted no-snippet] GT={gt:?} corrupted={corrupted:?} \
1263             OFF_lines={off_lines:?} (precise {off_precise}/3) \
1264             ON_lines={on_lines:?} (precise {on_precise}/3) regressions={regressions}"
1265        );
1266        // No assertion on precise count here (claimed-only is weaker); the
1267        // eprintln carries the numbers. Guard only against regressions.
1268        assert_eq!(
1269            regressions, 0,
1270            "claimed-line snap must not move AWAY from GT"
1271        );
1272    }
1273
1274    #[test]
1275    fn measure_claimed_only_boundary_offbyone() {
1276        // Scenario C': the one case the claimed-line snap CAN help —
1277        // claimed lands just OUTSIDE the hunk (within the +/-2 tolerance) on
1278        // a non-existent new-side position. GT 43; claim 48 = one past
1279        // hunk1's last new line (47) -> snaps back to 47 (closer to GT, not
1280        // exact). GT 52; claim 59 = one past hunk2 last line (58) -> 58.
1281        let gt = vec![43, 52];
1282        let corrupted = vec![48, 59];
1283        let issues_off: Vec<_> = corrupted
1284            .iter()
1285            .map(|&l| issue_at("src/compose.ts", l))
1286            .collect();
1287        let mut issues_on = issues_off.clone();
1288        apply_hunk_line_resolution(&mut issues_on, &[], HONO_DIFF);
1289        let on_lines: Vec<_> = issues_on.iter().map(|i| i.line).collect();
1290        // Did ON move each line CLOSER to GT than OFF?
1291        let mut improved = 0;
1292        let mut regressed = 0;
1293        for ((off, on), &g) in issues_off.iter().zip(issues_on.iter()).zip(gt.iter()) {
1294            let off_d = (off.line.unwrap_or(g) - g).abs();
1295            let on_d = (on.line.unwrap_or(g) - g).abs();
1296            if on_d < off_d {
1297                improved += 1;
1298            }
1299            if on_d > off_d {
1300                regressed += 1;
1301            }
1302        }
1303        eprintln!(
1304            "[MEASURE C' claimed-only boundary] GT={gt:?} corrupted={corrupted:?} \
1305             ON_lines={on_lines:?} improved(closer)={improved} regressed={regressed}"
1306        );
1307        // Snap clamps an out-of-range claim back to the nearest real line:
1308        // strictly closer, never farther. (Closer, not exact — snippetless.)
1309        assert_eq!(regressed, 0);
1310    }
1311
1312    #[test]
1313    fn ambiguous_duplicate_snippet_prefers_claimed_occurrence() {
1314        // The snippet `handler = middleware[i][0][0]` occurs on the new side at
1315        // BOTH line 46 (pre-existing context) and line 53 (the newly-added
1316        // dup). For an issue whose claimed line is 53, the claimed-line
1317        // tie-break must keep the nearer occurrence (53) rather than snapping
1318        // to the first match (46) — the end-to-end check of the resolver guard
1319        // (without it, ON would move a correctly-claimed line onto line 46).
1320        let snippet = "      handler = middleware[i][0][0]".to_owned();
1321        let mut issues = vec![issue_at("src/compose.ts", 53)];
1322        let snippets = vec![Some(snippet)];
1323        apply_hunk_line_resolution(&mut issues, &snippets, HONO_DIFF);
1324        assert_eq!(
1325            issues[0].line,
1326            Some(53),
1327            "must keep the claimed duplicate (53), not snap to the far one (46)"
1328        );
1329    }
1330}
1331
1332pub async fn run_review_smart(
1333    db: &sqlx::SqlitePool,
1334    input: ReviewCheckInput,
1335) -> crate::Result<ReviewCheckResult> {
1336    let settings = crate::settings::get().await.unwrap_or_default();
1337    let review_id = input.review_id.clone();
1338    let multi_perspective = settings.review_engine.multi_perspective;
1339
1340    if review_id.is_none() {
1341        let started = std::time::Instant::now();
1342        let mut result = match select_review_mode(multi_perspective) {
1343            "multi" => run_review_multi(db, input).await?,
1344            _ => run_review(db, input).await?,
1345        };
1346        let duration_ms = u64::try_from(started.elapsed().as_millis()).unwrap_or(u64::MAX);
1347        if let Some(stats) = result.stats.as_mut() {
1348            stats.duration_ms = Some(duration_ms);
1349        }
1350        return Ok(result);
1351    }
1352
1353    let started = std::time::Instant::now();
1354    let mut trajectory = TrajectoryBuilder::new();
1355    let mut result = match select_review_mode(multi_perspective) {
1356        "multi" => run_review_multi_with_trajectory(db, input, Some(&mut trajectory)).await?,
1357        _ => run_review_with_trajectory(db, input, Some(&mut trajectory)).await?,
1358    };
1359    let duration_ms = u64::try_from(started.elapsed().as_millis()).unwrap_or(u64::MAX);
1360    if let Some(stats) = result.stats.as_mut() {
1361        stats.duration_ms = Some(duration_ms);
1362    }
1363
1364    if let Some(id) = review_id {
1365        upload_review_telemetry(id, duration_ms, multi_perspective, &result, trajectory).await;
1366    }
1367
1368    Ok(result)
1369}
1370
1371/// Fire-and-forget telemetry writeback.
1372async fn upload_review_telemetry(
1373    review_id: String,
1374    duration_ms: u64,
1375    multi_perspective: bool,
1376    result: &ReviewCheckResult,
1377    trajectory: TrajectoryBuilder,
1378) {
1379    let cloud = crate::cloud::client::CloudClient::create().await;
1380    if !cloud.is_logged_in() {
1381        return;
1382    }
1383
1384    let past_verdicts_used = trajectory.steps().iter().find_map(|step| match step {
1385        TrajectoryStep::PastVerdictsRecalled { count, .. } => {
1386            Some(u32::try_from(*count).unwrap_or(u32::MAX))
1387        }
1388        _ => None,
1389    });
1390
1391    let metrics_req = crate::cloud::api_types::RecordReviewMetricsRequest {
1392        input_tokens: Some(u32::try_from(result.prompt_tokens_estimate.max(0)).unwrap_or(u32::MAX)),
1393        output_tokens: None,
1394        estimated_cost_usd: None,
1395        duration_ms: Some(duration_ms),
1396        perspective_count: Some(if multi_perspective { 5 } else { 1 }),
1397        past_verdicts_used,
1398    };
1399
1400    let pool = crate::db::init_db().await.ok();
1401    if let Some(pool) = pool {
1402        let q = crate::cloud::outbox::OutboxQueue::new(pool);
1403        let metrics_payload = serde_json::json!({
1404            "review_id": review_id,
1405            "req": metrics_req,
1406        });
1407        if let Ok(s) = serde_json::to_string(&metrics_payload) {
1408            let _ = q
1409                .enqueue(crate::cloud::outbox::kind::REVIEW_METRICS, &s)
1410                .await;
1411        }
1412        if !trajectory.is_empty() {
1413            let trajectory_payload = serde_json::json!({
1414                "pr_review_id": review_id,
1415                "steps": trajectory.into_json(),
1416            });
1417            if let Ok(s) = serde_json::to_string(&trajectory_payload) {
1418                let _ = q.enqueue(crate::cloud::outbox::kind::TRAJECTORY, &s).await;
1419            }
1420        }
1421        let _ = crate::cloud::outbox::drain_outbox(&q, &cloud, 8).await;
1422    } else {
1423        let _ = cloud.record_review_metrics(&review_id, metrics_req).await;
1424        if !trajectory.is_empty() {
1425            let _ = cloud
1426                .save_trajectory(&review_id, trajectory.into_json())
1427                .await;
1428        }
1429    }
1430}
1431
1432pub async fn run_review(
1433    db: &sqlx::SqlitePool,
1434    input: ReviewCheckInput,
1435) -> crate::Result<ReviewCheckResult> {
1436    run_review_with_trajectory(db, input, None).await
1437}
1438
1439/// Trajectory-aware variant of `run_review`.
1440pub async fn run_review_with_trajectory(
1441    db: &sqlx::SqlitePool,
1442    input: ReviewCheckInput,
1443    mut trajectory: Option<&mut TrajectoryBuilder>,
1444) -> crate::Result<ReviewCheckResult> {
1445    let trace_id = uuid::Uuid::new_v4().to_string();
1446
1447    let engine = resolve_review_engine(db).await?;
1448
1449    let retrieval_intent = crate::context::intent_filter::build_review_intent_text(
1450        input.file_path.as_deref(),
1451        &input.diff_content,
1452    );
1453    let retrieval_query = if retrieval_intent.trim().is_empty() {
1454        input.diff_content.as_str()
1455    } else {
1456        retrieval_intent.as_str()
1457    };
1458    let repo_scopes = repo_scopes_for_input(&input);
1459
1460    // Loaded before rule prep so the (opt-in) applicability judge can gate on
1461    // it; reused for the recall / self-check / summary steps below.
1462    let settings = crate::settings::get().await.unwrap_or_default();
1463
1464    // The applicability judge, when enabled, reuses the resolved review engine
1465    // through its own `ReviewLlm` (the engine is consumed later by the main
1466    // review call, so clone it here for the judge's separate round-trip).
1467    let judge_llm = make_review_llm(engine.clone());
1468    let prepared = prepare_review_rules(
1469        db,
1470        &input,
1471        retrieval_query,
1472        &repo_scopes,
1473        judge_llm.as_ref(),
1474        &settings.review_engine,
1475        "review_check",
1476    )
1477    .await;
1478    let PreparedReviewRules {
1479        rules_text,
1480        count: matched_rules,
1481        ids: matched_rule_ids,
1482        titles: matched_rule_titles,
1483    } = prepared;
1484
1485    if let Some(tb) = trajectory.as_deref_mut() {
1486        tb.push(TrajectoryStep::ChunksRetrieved {
1487            count: matched_rules.try_into().unwrap_or(usize::MAX),
1488            symbols: matched_rule_titles.clone(),
1489            similarity_scores: Vec::new(),
1490        });
1491        tb.push(TrajectoryStep::RulesApplied {
1492            rule_ids: matched_rule_ids.clone(),
1493            source: RuleSource::Team,
1494        });
1495    }
1496
1497    let past_verdicts = if input.fast_preview {
1498        Vec::new()
1499    } else {
1500        recall_past_verdicts_for_review(
1501            &settings,
1502            &input.diff_content,
1503            if input.project_id.is_empty() {
1504                None
1505            } else {
1506                Some(&input.project_id)
1507            },
1508            &repo_scopes,
1509        )
1510        .await
1511    };
1512
1513    if let Some(tb) = trajectory.as_deref_mut() {
1514        let recalled_items = build_recalled_verdicts(&past_verdicts);
1515        let top_similarities: Vec<f32> =
1516            recalled_items.iter().map(|item| item.similarity).collect();
1517        tb.push(TrajectoryStep::PastVerdictsRecalled {
1518            count: past_verdicts.len(),
1519            top_similarities,
1520            recalled_items,
1521        });
1522    }
1523
1524    let seg = build_segmented_prompt(
1525        None,
1526        &[],
1527        &input.diff_content,
1528        "",
1529        None,
1530        if past_verdicts.is_empty() {
1531            None
1532        } else {
1533            Some(&past_verdicts)
1534        },
1535    );
1536    let user_prompt = build_user_prompt(
1537        &input.diff_content,
1538        rules_text.as_deref(),
1539        input.file_path.as_deref(),
1540    );
1541
1542    let prompt_tokens_estimate = (i32::try_from(user_prompt.len())
1543        .unwrap_or(i32::MAX)
1544        .saturating_add(3))
1545        / 4;
1546
1547    if let Some(path) = crate::env::fix_dump_dir() {
1548        let _ = std::fs::create_dir_all(&path);
1549        let _ = std::fs::write(format!("{path}/last_user.txt"), &user_prompt);
1550        let _ = std::fs::write(
1551            format!("{path}/last_system.txt"),
1552            format!("{}{}", seg.stable_prefix, seg.dynamic_suffix),
1553        );
1554    }
1555
1556    let ai_response = call_review_engine(&engine, &seg, &user_prompt).await?;
1557    if let Some(path) = crate::env::fix_dump_dir() {
1558        let _ = std::fs::write(format!("{path}/last_response.txt"), &ai_response);
1559    }
1560
1561    if let Some(tb) = trajectory.as_deref_mut() {
1562        tb.push(TrajectoryStep::LlmCall {
1563            perspective: "single".to_owned(),
1564            input_tokens: u32::try_from(prompt_tokens_estimate.max(0)).unwrap_or(u32::MAX),
1565            output_tokens: 0,
1566            raw_output: None,
1567        });
1568    }
1569
1570    let mut issues = parse_issues(&ai_response);
1571    // Snap each issue to its exact diff line before verification so the verify
1572    // pass and `difflore fix` see the precise location. Gated by
1573    // `review_engine.hunk_line_resolution` (default off -> no-op).
1574    if settings.review_engine.hunk_line_resolution {
1575        let snippets = super::parse::extract_issue_snippets(&ai_response);
1576        apply_hunk_line_resolution(&mut issues, &snippets, &input.diff_content);
1577    }
1578    let issues = issues;
1579    if crate::env::fix_debug() {
1580        eprintln!(
1581            "[fix-debug] single-pass raw_response_len={} parsed_issues={}",
1582            ai_response.len(),
1583            issues.len(),
1584        );
1585        if issues.is_empty() && ai_response.len() < 4000 {
1586            eprintln!("[fix-debug] response body: {ai_response}");
1587        }
1588    }
1589
1590    let llm: Box<dyn ReviewLlm> = make_review_llm(engine);
1591    let pre_verify_count = issues.len();
1592    let issues = verify_pass_internal(
1593        llm.as_ref(),
1594        settings.review_engine.self_check_enabled && !input.fast_preview,
1595        &input.diff_content,
1596        issues,
1597    )
1598    .await;
1599    if crate::env::fix_debug() {
1600        eprintln!(
1601            "[fix-debug] verify: pre={} post={} self_check_enabled={}",
1602            pre_verify_count,
1603            issues.len(),
1604            settings.review_engine.self_check_enabled && !input.fast_preview,
1605        );
1606    }
1607
1608    if let Some(tb) = trajectory.as_deref_mut() {
1609        let keep_count = u32::try_from(issues.len()).unwrap_or(u32::MAX);
1610        let drop_count =
1611            u32::try_from(pre_verify_count.saturating_sub(issues.len())).unwrap_or(u32::MAX);
1612        let avg_confidence = if issues.is_empty() {
1613            0.0
1614        } else {
1615            issues.iter().map(|i| i.confidence).sum::<f32>() / (issues.len() as f32)
1616        };
1617        tb.push(TrajectoryStep::SelfCheck {
1618            keep_count,
1619            drop_count,
1620            avg_confidence,
1621        });
1622    }
1623
1624    let mut issues = issues;
1625    apply_missing_rule_attributions(&mut issues, &matched_rule_ids, &matched_rule_titles);
1626    issues.sort_by(|a, b| {
1627        b.confidence
1628            .partial_cmp(&a.confidence)
1629            .unwrap_or(std::cmp::Ordering::Equal)
1630    });
1631
1632    let summary = run_review_summary_internal(
1633        llm.as_ref(),
1634        settings.review_engine.review_summary_enabled && !input.fast_preview,
1635        &input.diff_content,
1636        &issues,
1637    )
1638    .await;
1639
1640    if let Some(tb) = trajectory.as_deref_mut() {
1641        let ids = issues
1642            .iter()
1643            .map(|i| i.rule_id.clone().unwrap_or_else(|| i.rule.clone()))
1644            .collect();
1645        tb.push(TrajectoryStep::FinalDecision {
1646            issue_ids_emitted: ids,
1647        });
1648    }
1649
1650    let stats = ReviewStats {
1651        input_tokens: u32::try_from(prompt_tokens_estimate.max(0)).unwrap_or(u32::MAX),
1652        duration_ms: None,
1653        perspective_count: 1,
1654        past_verdicts_used: u32::try_from(past_verdicts.len()).unwrap_or(u32::MAX),
1655        trajectory_step_count: trajectory
1656            .as_deref()
1657            .map(|tb| u32::try_from(tb.len()).unwrap_or(u32::MAX)),
1658    };
1659
1660    Ok(ReviewCheckResult {
1661        issues,
1662        matched_rules,
1663        matched_rule_ids,
1664        matched_rule_titles,
1665        prompt_tokens_estimate,
1666        trace_id,
1667        summary,
1668        stats: Some(stats),
1669    })
1670}
difflore_core/review/pipeline/mod.rs

difflore_core/review/pipeline/
mod.rs