use super::retrieval::ScoredRuleChunk;
use super::rule_source::RuleExample;
use super::types::PastVerdict;
pub const RULE_TOKEN_BUDGET: usize = 1500;
#[derive(Debug, Clone, Copy)]
pub struct TokenBudgets {
pub rule: usize,
}
impl Default for TokenBudgets {
fn default() -> Self {
Self {
rule: RULE_TOKEN_BUDGET,
}
}
}
impl TokenBudgets {
pub fn from_overrides(rule: Option<i32>) -> Self {
let rule = rule
.filter(|v| *v > 0)
.and_then(|v| usize::try_from(v).ok())
.unwrap_or(RULE_TOKEN_BUDGET);
Self { rule }
}
}
const fn estimate_tokens(text: &str) -> usize {
text.len().div_ceil(4)
}
#[derive(Debug, Clone)]
pub struct ContextSection {
pub content: String,
}
#[derive(Debug, Clone)]
pub struct AssembledContext {
pub rule_sections: Vec<ContextSection>,
pub rule_count: usize,
pub estimated_tokens: usize,
}
fn format_rule_with_examples(rule_content: &str, examples: Option<&Vec<RuleExample>>) -> String {
let mut text = rule_content.to_owned();
if let Some(examples) = examples
&& !examples.is_empty()
{
text.push_str("\n\n### Examples\n");
for (i, ex) in examples.iter().enumerate() {
if let Some(desc) = &ex.description {
text.push_str(&format!("\n**Example {}**: {}\n", i + 1, desc));
} else {
text.push_str(&format!("\n**Example {}**:\n", i + 1));
}
text.push_str(&format!(
"\n❌ Bad:\n```\n{}\n```\n\n✅ Good:\n```\n{}\n```\n",
ex.bad_code, ex.good_code
));
}
}
text
}
pub fn assemble(
rule_chunks: &[ScoredRuleChunk],
query: &str,
task_intent: &str,
) -> AssembledContext {
assemble_with_examples_and_budgets(
rule_chunks,
query,
task_intent,
None,
TokenBudgets::default(),
)
}
#[allow(clippy::implicit_hasher)] pub fn assemble_with_examples(
rule_chunks: &[ScoredRuleChunk],
query: &str,
task_intent: &str,
examples_map: Option<&std::collections::HashMap<String, Vec<RuleExample>>>,
) -> AssembledContext {
assemble_with_examples_and_budgets(
rule_chunks,
query,
task_intent,
examples_map,
TokenBudgets::default(),
)
}
#[allow(clippy::implicit_hasher)] pub fn assemble_with_examples_and_budgets(
rule_chunks: &[ScoredRuleChunk],
query: &str,
task_intent: &str,
examples_map: Option<&std::collections::HashMap<String, Vec<RuleExample>>>,
budgets: TokenBudgets,
) -> AssembledContext {
let mut rule_sections = Vec::new();
let mut rule_tokens = 0;
for scored in rule_chunks {
let examples = examples_map.and_then(|m| m.get(&scored.skill_id));
let section_text = format_rule_with_examples(&scored.content, examples);
let tokens = estimate_tokens(§ion_text);
if rule_tokens + tokens > budgets.rule {
break;
}
rule_tokens += tokens;
rule_sections.push(ContextSection {
content: section_text,
});
}
let _query = query;
let _task_intent = task_intent;
AssembledContext {
rule_count: rule_sections.len(),
rule_sections,
estimated_tokens: rule_tokens,
}
}
#[derive(Debug, Clone)]
pub struct PastVerdictSection {
pub entries: Vec<PastVerdict>,
}
impl PastVerdictSection {
pub const fn new(entries: Vec<PastVerdict>) -> Self {
Self { entries }
}
pub const fn is_empty(&self) -> bool {
self.entries.is_empty()
}
pub fn render(&self) -> String {
if self.entries.is_empty() {
return String::new();
}
let mut s = String::new();
s.push_str("## Past verdicts on similar code\n\n");
s.push_str("The following similar code pieces were previously reviewed:\n\n");
for (i, v) in self.entries.iter().enumerate() {
s.push_str(&format!(
"{}. [{}, similarity {:.2}] {}\n",
i + 1,
v.status,
v.similarity,
v.code_snippet,
));
s.push_str(&format!(" Issue: {}\n", v.issue_text));
if let Some(reason) = v.reason.as_ref()
&& !reason.is_empty()
{
s.push_str(&format!(" Reason: {reason}\n"));
}
}
s
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::context::retrieval::ScoredRuleChunk;
use crate::context::types::PastVerdict;
fn make_rule_chunk(skill_id: &str, content: &str) -> ScoredRuleChunk {
ScoredRuleChunk {
skill_id: skill_id.to_owned(),
content: content.to_owned(),
score: 1.0,
confidence: 0.8,
}
}
#[test]
fn estimate_tokens_approximates_four_chars_per_token() {
assert_eq!(estimate_tokens(""), 0);
assert_eq!(estimate_tokens("ab"), 1); assert_eq!(estimate_tokens("abcdefgh"), 2); }
#[test]
fn assemble_respects_rule_token_budget() {
let big_rule = "r".repeat(2000);
let rules: Vec<ScoredRuleChunk> = (0..10)
.map(|i| make_rule_chunk(&format!("s{i}"), &big_rule))
.collect();
let assembled = assemble(&rules, "q", "i");
assert!(
assembled.rule_count < 10,
"expected rule budget to truncate, got {}",
assembled.rule_count
);
}
#[test]
fn token_budgets_from_overrides_uses_defaults_when_invalid() {
let b = TokenBudgets::from_overrides(None);
assert_eq!(b.rule, RULE_TOKEN_BUDGET);
let b = TokenBudgets::from_overrides(Some(-5));
assert_eq!(b.rule, RULE_TOKEN_BUDGET);
}
#[test]
fn token_budgets_from_overrides_accepts_positive_values() {
let b = TokenBudgets::from_overrides(Some(50));
assert_eq!(b.rule, 50);
}
#[test]
fn assemble_with_smaller_budget_truncates_more_aggressively() {
let big_rule = "r".repeat(2000);
let rules: Vec<ScoredRuleChunk> = (0..10)
.map(|i| make_rule_chunk(&format!("s{i}"), &big_rule))
.collect();
let small_budget = TokenBudgets { rule: 100 };
let assembled = assemble_with_examples_and_budgets(&rules, "q", "i", None, small_budget);
assert!(
assembled.rule_count <= 1,
"expected aggressive truncation, got {}",
assembled.rule_count,
);
}
fn sample_verdict(
id: &str,
status: &str,
snippet: &str,
issue: &str,
reason: Option<&str>,
sim: f32,
) -> PastVerdict {
PastVerdict {
extraction_id: id.into(),
code_snippet: snippet.into(),
issue_text: issue.into(),
status: status.into(),
reason: reason.map(Into::into),
similarity: sim,
created_at: "2026-04-10T00:00:00Z".into(),
signature: None,
source_pr_number: None,
source_pr_title: None,
source_pr_url: None,
}
}
#[test]
fn test_past_verdict_section_empty_renders_empty_string() {
let section = PastVerdictSection::new(Vec::new());
assert!(section.is_empty());
assert_eq!(section.render(), "");
}
#[test]
fn test_past_verdict_section_renders_entries() {
let section = PastVerdictSection::new(vec![
sample_verdict(
"e1",
"approved",
"let x = value.unwrap();",
"unwrap can panic",
Some("panics on None at runtime"),
0.874,
),
sample_verdict(
"e2",
"rejected",
"println!(\"debug\");",
"debug print left in code",
None,
0.612,
),
]);
let out = section.render();
assert!(out.contains("## Past verdicts on similar code"));
assert!(out.contains("similar code pieces were previously reviewed"));
assert!(out.contains("[approved, similarity 0.87]"));
assert!(out.contains("let x = value.unwrap();"));
assert!(out.contains("Issue: unwrap can panic"));
assert!(out.contains("Reason: panics on None at runtime"));
assert!(out.contains("[rejected, similarity 0.61]"));
assert!(out.contains("println!(\"debug\");"));
assert!(out.contains("1. "));
assert!(out.contains("2. "));
}
#[test]
fn assemble_includes_examples_when_provided() {
let rule = make_rule_chunk("skill1", "Always prefer `?` over unwrap()");
let mut examples_map = std::collections::HashMap::new();
examples_map.insert(
"skill1".to_owned(),
vec![RuleExample {
id: "ex1".into(),
skill_id: "skill1".into(),
description: Some("unwrap vs ?".into()),
bad_code: "value.unwrap()".into(),
good_code: "value?".into(),
source: "manual".into(),
}],
);
let assembled = assemble_with_examples(&[rule], "q", "i", Some(&examples_map));
assert_eq!(assembled.rule_count, 1);
let content = &assembled.rule_sections[0].content;
assert!(content.contains("Example 1"));
assert!(content.contains("value.unwrap()"));
assert!(content.contains("value?"));
}
}