Skip to main content

difflore_core/context/
assembler.rs

1use super::retrieval::ScoredRuleChunk;
2use super::rule_source::RuleExample;
3use super::types::PastVerdict;
4
5pub const RULE_TOKEN_BUDGET: usize = 1500;
6
7/// Per-call token budget for assembled rule context. Defaults to the
8/// compile-time constant in `super::config`, but callers (e.g. the
9/// orchestrator) may override based on per-project settings.
10#[derive(Debug, Clone, Copy)]
11pub struct TokenBudgets {
12    pub rule: usize,
13}
14
15impl Default for TokenBudgets {
16    fn default() -> Self {
17        Self {
18            rule: RULE_TOKEN_BUDGET,
19        }
20    }
21}
22
23impl TokenBudgets {
24    /// Build from optional settings overrides. Non-positive values fall back
25    /// to the compile-time defaults.
26    pub fn from_overrides(rule: Option<i32>) -> Self {
27        let rule = rule
28            .filter(|v| *v > 0)
29            .and_then(|v| usize::try_from(v).ok())
30            .unwrap_or(RULE_TOKEN_BUDGET);
31        Self { rule }
32    }
33}
34
35const fn estimate_tokens(text: &str) -> usize {
36    text.len().div_ceil(4)
37}
38
39#[derive(Debug, Clone)]
40pub struct ContextSection {
41    pub content: String,
42}
43
44#[derive(Debug, Clone)]
45pub struct AssembledContext {
46    pub rule_sections: Vec<ContextSection>,
47    pub rule_count: usize,
48    pub estimated_tokens: usize,
49}
50
51/// Format a rule with its few-shot examples for prompt injection.
52fn format_rule_with_examples(rule_content: &str, examples: Option<&Vec<RuleExample>>) -> String {
53    let mut text = rule_content.to_owned();
54
55    if let Some(examples) = examples
56        && !examples.is_empty()
57    {
58        text.push_str("\n\n### Examples\n");
59        for (i, ex) in examples.iter().enumerate() {
60            if let Some(desc) = &ex.description {
61                text.push_str(&format!("\n**Example {}**: {}\n", i + 1, desc));
62            } else {
63                text.push_str(&format!("\n**Example {}**:\n", i + 1));
64            }
65            text.push_str(&format!(
66                "\nāŒ Bad:\n```\n{}\n```\n\nāœ… Good:\n```\n{}\n```\n",
67                ex.bad_code, ex.good_code
68            ));
69        }
70    }
71
72    text
73}
74
75pub fn assemble(
76    rule_chunks: &[ScoredRuleChunk],
77    query: &str,
78    task_intent: &str,
79) -> AssembledContext {
80    assemble_with_examples_and_budgets(
81        rule_chunks,
82        query,
83        task_intent,
84        None,
85        TokenBudgets::default(),
86    )
87}
88
89#[allow(clippy::implicit_hasher)] // reason: stable public API; `HashMap<K,V>` (default hasher) is what every caller passes.
90pub fn assemble_with_examples(
91    rule_chunks: &[ScoredRuleChunk],
92    query: &str,
93    task_intent: &str,
94    examples_map: Option<&std::collections::HashMap<String, Vec<RuleExample>>>,
95) -> AssembledContext {
96    assemble_with_examples_and_budgets(
97        rule_chunks,
98        query,
99        task_intent,
100        examples_map,
101        TokenBudgets::default(),
102    )
103}
104
105#[allow(clippy::implicit_hasher)] // reason: stable public API; `HashMap<K,V>` (default hasher) is what every caller passes.
106pub fn assemble_with_examples_and_budgets(
107    rule_chunks: &[ScoredRuleChunk],
108    query: &str,
109    task_intent: &str,
110    examples_map: Option<&std::collections::HashMap<String, Vec<RuleExample>>>,
111    budgets: TokenBudgets,
112) -> AssembledContext {
113    let mut rule_sections = Vec::new();
114    let mut rule_tokens = 0;
115
116    for scored in rule_chunks {
117        let examples = examples_map.and_then(|m| m.get(&scored.skill_id));
118        let section_text = format_rule_with_examples(&scored.content, examples);
119        let tokens = estimate_tokens(&section_text);
120        if rule_tokens + tokens > budgets.rule {
121            break;
122        }
123        rule_tokens += tokens;
124        rule_sections.push(ContextSection {
125            content: section_text,
126        });
127    }
128
129    let _query = query;
130    let _task_intent = task_intent;
131
132    AssembledContext {
133        rule_count: rule_sections.len(),
134        rule_sections,
135        estimated_tokens: rule_tokens,
136    }
137}
138
139/// Render a past-verdict recall block for injection into the review
140/// prompt. Review memory places this at the front of the dynamic suffix so the
141/// LLM reads prior decisions before the current diff.
142#[derive(Debug, Clone)]
143pub struct PastVerdictSection {
144    pub entries: Vec<PastVerdict>,
145}
146
147impl PastVerdictSection {
148    pub const fn new(entries: Vec<PastVerdict>) -> Self {
149        Self { entries }
150    }
151
152    pub const fn is_empty(&self) -> bool {
153        self.entries.is_empty()
154    }
155
156    /// Render the section as a markdown snippet. Returns an empty string
157    /// when there are no entries, so call sites can unconditionally splice
158    /// the result into a prompt without worrying about stray headers.
159    pub fn render(&self) -> String {
160        if self.entries.is_empty() {
161            return String::new();
162        }
163        let mut s = String::new();
164        s.push_str("## Past verdicts on similar code\n\n");
165        s.push_str("The following similar code pieces were previously reviewed:\n\n");
166        for (i, v) in self.entries.iter().enumerate() {
167            s.push_str(&format!(
168                "{}. [{}, similarity {:.2}] {}\n",
169                i + 1,
170                v.status,
171                v.similarity,
172                v.code_snippet,
173            ));
174            s.push_str(&format!("   Issue: {}\n", v.issue_text));
175            if let Some(reason) = v.reason.as_ref()
176                && !reason.is_empty()
177            {
178                s.push_str(&format!("   Reason: {reason}\n"));
179            }
180        }
181        s
182    }
183}
184
185#[cfg(test)]
186mod tests {
187    use super::*;
188    use crate::context::retrieval::ScoredRuleChunk;
189    use crate::context::types::PastVerdict;
190
191    fn make_rule_chunk(skill_id: &str, content: &str) -> ScoredRuleChunk {
192        ScoredRuleChunk {
193            skill_id: skill_id.to_owned(),
194            content: content.to_owned(),
195            score: 1.0,
196            confidence: 0.8,
197        }
198    }
199
200    #[test]
201    fn estimate_tokens_approximates_four_chars_per_token() {
202        assert_eq!(estimate_tokens(""), 0);
203        assert_eq!(estimate_tokens("ab"), 1); // (2+3)/4 = 1
204        assert_eq!(estimate_tokens("abcdefgh"), 2); // (8+3)/4 = 2
205    }
206
207    #[test]
208    fn assemble_respects_rule_token_budget() {
209        let big_rule = "r".repeat(2000);
210        let rules: Vec<ScoredRuleChunk> = (0..10)
211            .map(|i| make_rule_chunk(&format!("s{i}"), &big_rule))
212            .collect();
213
214        let assembled = assemble(&rules, "q", "i");
215        assert!(
216            assembled.rule_count < 10,
217            "expected rule budget to truncate, got {}",
218            assembled.rule_count
219        );
220    }
221
222    #[test]
223    fn token_budgets_from_overrides_uses_defaults_when_invalid() {
224        let b = TokenBudgets::from_overrides(None);
225        assert_eq!(b.rule, RULE_TOKEN_BUDGET);
226
227        let b = TokenBudgets::from_overrides(Some(-5));
228        assert_eq!(b.rule, RULE_TOKEN_BUDGET);
229    }
230
231    #[test]
232    fn token_budgets_from_overrides_accepts_positive_values() {
233        let b = TokenBudgets::from_overrides(Some(50));
234        assert_eq!(b.rule, 50);
235    }
236
237    #[test]
238    fn assemble_with_smaller_budget_truncates_more_aggressively() {
239        let big_rule = "r".repeat(2000);
240        let rules: Vec<ScoredRuleChunk> = (0..10)
241            .map(|i| make_rule_chunk(&format!("s{i}"), &big_rule))
242            .collect();
243
244        let small_budget = TokenBudgets { rule: 100 };
245        let assembled = assemble_with_examples_and_budgets(&rules, "q", "i", None, small_budget);
246        assert!(
247            assembled.rule_count <= 1,
248            "expected aggressive truncation, got {}",
249            assembled.rule_count,
250        );
251    }
252
253    fn sample_verdict(
254        id: &str,
255        status: &str,
256        snippet: &str,
257        issue: &str,
258        reason: Option<&str>,
259        sim: f32,
260    ) -> PastVerdict {
261        PastVerdict {
262            extraction_id: id.into(),
263            code_snippet: snippet.into(),
264            issue_text: issue.into(),
265            status: status.into(),
266            reason: reason.map(Into::into),
267            similarity: sim,
268            created_at: "2026-04-10T00:00:00Z".into(),
269            signature: None,
270            source_pr_number: None,
271            source_pr_title: None,
272            source_pr_url: None,
273        }
274    }
275
276    #[test]
277    fn test_past_verdict_section_empty_renders_empty_string() {
278        let section = PastVerdictSection::new(Vec::new());
279        assert!(section.is_empty());
280        assert_eq!(section.render(), "");
281    }
282
283    #[test]
284    fn test_past_verdict_section_renders_entries() {
285        let section = PastVerdictSection::new(vec![
286            sample_verdict(
287                "e1",
288                "approved",
289                "let x = value.unwrap();",
290                "unwrap can panic",
291                Some("panics on None at runtime"),
292                0.874,
293            ),
294            sample_verdict(
295                "e2",
296                "rejected",
297                "println!(\"debug\");",
298                "debug print left in code",
299                None,
300                0.612,
301            ),
302        ]);
303
304        let out = section.render();
305        // Header + intro
306        assert!(out.contains("## Past verdicts on similar code"));
307        assert!(out.contains("similar code pieces were previously reviewed"));
308        // First entry -- includes status, formatted similarity, snippet, issue, reason
309        assert!(out.contains("[approved, similarity 0.87]"));
310        assert!(out.contains("let x = value.unwrap();"));
311        assert!(out.contains("Issue: unwrap can panic"));
312        assert!(out.contains("Reason: panics on None at runtime"));
313        // Second entry -- rejected, no reason line
314        assert!(out.contains("[rejected, similarity 0.61]"));
315        assert!(out.contains("println!(\"debug\");"));
316        // Numbered 1. and 2.
317        assert!(out.contains("1. "));
318        assert!(out.contains("2. "));
319    }
320
321    #[test]
322    fn assemble_includes_examples_when_provided() {
323        let rule = make_rule_chunk("skill1", "Always prefer `?` over unwrap()");
324        let mut examples_map = std::collections::HashMap::new();
325        examples_map.insert(
326            "skill1".to_owned(),
327            vec![RuleExample {
328                id: "ex1".into(),
329                skill_id: "skill1".into(),
330                description: Some("unwrap vs ?".into()),
331                bad_code: "value.unwrap()".into(),
332                good_code: "value?".into(),
333                source: "manual".into(),
334            }],
335        );
336
337        let assembled = assemble_with_examples(&[rule], "q", "i", Some(&examples_map));
338        assert_eq!(assembled.rule_count, 1);
339        let content = &assembled.rule_sections[0].content;
340        assert!(content.contains("Example 1"));
341        assert!(content.contains("value.unwrap()"));
342        assert!(content.contains("value?"));
343    }
344}