ralph_workflow/phases/commit/
diff_truncation.rs1const MAX_SAFE_PROMPT_SIZE: u64 = 200_000;
3
4const GLM_MAX_PROMPT_SIZE: u64 = 100_000;
6
7const CLAUDE_MAX_PROMPT_SIZE: u64 = 300_000;
9
10pub fn model_budget_bytes_for_agent_name(commit_agent: &str) -> u64 {
12 let agent_lower = commit_agent.to_lowercase();
13
14 if agent_lower.contains("glm")
15 || agent_lower.contains("zhipuai")
16 || agent_lower.contains("zai")
17 || agent_lower.contains("qwen")
18 || agent_lower.contains("deepseek")
19 {
20 GLM_MAX_PROMPT_SIZE
21 } else if agent_lower.contains("claude")
22 || agent_lower.contains("ccs")
23 || agent_lower.contains("anthropic")
24 {
25 CLAUDE_MAX_PROMPT_SIZE
26 } else {
27 MAX_SAFE_PROMPT_SIZE
28 }
29}
30
31pub fn effective_model_budget_bytes(agent_names: &[String]) -> u64 {
32 agent_names
33 .iter()
34 .map(|name| model_budget_bytes_for_agent_name(name))
35 .min()
36 .unwrap_or(MAX_SAFE_PROMPT_SIZE)
37}
38
39fn truncate_diff_if_large(diff: &str, max_size: usize) -> String {
41 if diff.len() <= max_size {
42 return diff.to_string();
43 }
44
45 let mut files: Vec<DiffFile> = Vec::new();
46 let mut current_file = DiffFile::default();
47 let mut in_file = false;
48
49 for line in diff.lines() {
50 if line.starts_with("diff --git ") {
51 if in_file && !current_file.lines.is_empty() {
52 files.push(std::mem::take(&mut current_file));
53 }
54 in_file = true;
55 current_file.lines.push(line.to_string());
56
57 if let Some(path) = line.split(" b/").nth(1) {
58 current_file.path = path.to_string();
59 current_file.priority = prioritize_file_path(path);
60 }
61 } else if in_file {
62 current_file.lines.push(line.to_string());
63 }
64 }
65
66 if in_file && !current_file.lines.is_empty() {
67 files.push(current_file);
68 }
69
70 files.sort_by(|a, b| b.priority.cmp(&a.priority));
71
72 let mut result = String::new();
73 let mut current_size = 0;
74 let mut files_included = 0;
75 let total_files = files.len();
76
77 for file in &files {
78 let file_size: usize = file.lines.iter().map(|l| l.len() + 1).sum();
79
80 if current_size + file_size <= max_size {
81 for line in &file.lines {
82 result.push_str(line);
83 result.push('\n');
84 }
85 current_size += file_size;
86 files_included += 1;
87 } else if files_included == 0 {
88 let truncated_lines = truncate_lines_to_fit(&file.lines, max_size);
89 for line in truncated_lines {
90 result.push_str(&line);
91 result.push('\n');
92 }
93 files_included = 1;
94 break;
95 } else {
96 break;
97 }
98 }
99
100 if files_included < total_files {
101 let summary = format!(
102 "\n[Truncated: {} of {} files shown]\n",
103 files_included, total_files
104 );
105 if summary.len() <= max_size {
106 if result.len() + summary.len() <= max_size {
107 result.push_str(&summary);
108 } else {
109 let target_bytes = max_size.saturating_sub(summary.len());
110 if target_bytes < result.len() {
111 let mut cut = 0usize;
112 for (idx, _) in result.char_indices() {
113 if idx > target_bytes {
114 break;
115 }
116 cut = idx;
117 }
118 result.truncate(cut);
119 }
120 result.push_str(&summary);
121 }
122 }
123 }
124
125 result
126}
127
128pub fn truncate_diff_to_model_budget(diff: &str, max_size_bytes: u64) -> (String, bool) {
129 let max_size = usize::try_from(max_size_bytes).unwrap_or(usize::MAX);
130 if diff.len() <= max_size {
131 (diff.to_string(), false)
132 } else {
133 (truncate_diff_if_large(diff, max_size), true)
134 }
135}
136
137#[derive(Default)]
138struct DiffFile {
139 path: String,
140 priority: i32,
141 lines: Vec<String>,
142}
143
144fn prioritize_file_path(path: &str) -> i32 {
145 let normalized = path.replace('\\', "/");
146 let parts: Vec<&str> = normalized.split('/').filter(|p| !p.is_empty()).collect();
147
148 if parts.contains(&"src") {
149 100
150 } else if parts.contains(&"tests") {
151 50
152 } else if normalized.ends_with(".md") || normalized.ends_with(".txt") {
153 10
154 } else {
155 0
156 }
157}
158
159fn truncate_lines_to_fit(lines: &[String], max_size: usize) -> Vec<String> {
160 let mut result = Vec::new();
161 let mut current_size = 0;
162
163 for line in lines {
164 let line_size = line.len() + 1;
165 if current_size + line_size <= max_size {
166 current_size += line_size;
167 result.push(line.clone());
168 } else {
169 break;
170 }
171 }
172
173 let suffix = " [truncated...]";
174 let suffix_len = suffix.len();
175
176 fn truncate_to_utf8_boundary(s: &mut String, max_bytes: usize) {
177 if s.len() <= max_bytes {
178 return;
179 }
180 let mut cut = 0usize;
181 for (idx, _) in s.char_indices() {
182 if idx > max_bytes {
183 break;
184 }
185 cut = idx;
186 }
187 s.truncate(cut);
188 }
189
190 if !result.is_empty() {
191 let mut total_size = current_size;
195 while !result.is_empty() && total_size + suffix_len > max_size {
196 let last_len = result.last().expect("checked non-empty").len();
197 let excess = total_size + suffix_len - max_size;
198 if excess < last_len {
199 let new_len = last_len - excess;
200 let last = result.last_mut().expect("checked non-empty");
201 truncate_to_utf8_boundary(last, new_len);
202 break;
203 }
204 let dropped = result.pop().expect("checked non-empty");
206 total_size = total_size.saturating_sub(dropped.len() + 1);
207 }
208
209 if let Some(last) = result.last_mut() {
210 last.push_str(suffix);
211 }
212 }
213
214 result
215}
216
217#[cfg(test)]
218mod diff_truncation_tests {
219 use super::*;
220
221 #[test]
222 fn prioritize_file_path_handles_crate_prefixed_paths() {
223 assert_eq!(prioritize_file_path("ralph-workflow/src/lib.rs"), 100);
226 assert_eq!(prioritize_file_path("ralph-workflow/tests/integration.rs"), 50);
227 assert_eq!(prioritize_file_path("README.md"), 10);
228 }
229
230 #[test]
231 fn truncate_diff_to_model_budget_never_exceeds_max_size() {
232 let files_included = 1;
233 let total_files = 2;
234 let summary = format!(
235 "\n[Truncated: {} of {} files shown]\n",
236 files_included, total_files
237 );
238
239 let max_size = 1_000usize;
240
241 let file1_header = "diff --git a/src/a.rs b/src/a.rs";
246 let desired_file1_size = max_size - summary.len() + 1;
247 let filler_line_len = desired_file1_size.saturating_sub(file1_header.len() + 2);
248 let file1 = format!(
249 "{file1_header}\n+{}\n",
250 "x".repeat(filler_line_len.saturating_sub(1))
251 );
252
253 let file2 = "diff --git a/tests/b.rs b/tests/b.rs\n+small\n";
254 let diff = format!("{file1}{file2}");
255
256 let (truncated, was_truncated) = truncate_diff_to_model_budget(&diff, max_size as u64);
257 assert!(was_truncated, "expected truncation when diff exceeds max size");
258 assert!(
259 truncated.len() <= max_size,
260 "truncated diff must not exceed max_size (got {} > {})",
261 truncated.len(),
262 max_size
263 );
264 }
265
266 #[test]
267 fn truncate_lines_to_fit_reserves_space_for_truncation_suffix() {
268 let max_size = 20usize;
272 let lines = vec!["x".repeat(max_size - 1)];
273
274 let truncated = truncate_lines_to_fit(&lines, max_size);
275
276 let total_size: usize = truncated.iter().map(|l| l.len() + 1).sum();
277 assert!(
278 total_size <= max_size,
279 "truncate_lines_to_fit must not exceed max_size after adding suffix (got {total_size} > {max_size})"
280 );
281 }
282
283 #[test]
292 fn truncate_diff_invariant_never_exceeds_max_size_edge_cases() {
293 let summary_len = "\n[Truncated: 1 of 2 files shown]\n".len();
295
296 for max_size in [
297 10, summary_len - 1, summary_len, summary_len + 1, summary_len + 10, 100, 1000, ] {
305 let file1 = format!(
306 "diff --git a/src/a.rs b/src/a.rs\n+{}\n",
307 "x".repeat(max_size)
308 );
309 let file2 = "diff --git a/tests/b.rs b/tests/b.rs\n+extra\n";
310 let diff = format!("{file1}{file2}");
311
312 let (truncated, _) = truncate_diff_to_model_budget(&diff, max_size as u64);
313 assert!(
314 truncated.len() <= max_size,
315 "truncated diff exceeded max_size {} (got {}): {:?}",
316 max_size,
317 truncated.len(),
318 &truncated[..truncated.len().min(100)]
319 );
320 }
321 }
322
323 #[test]
325 fn truncate_diff_boundary_content_sizes() {
326 for max_size in [50usize, 100, 200, 500] {
327 let header = "diff --git a/a b/a\n+";
329 let exact_diff = format!(
330 "{}{}",
331 header,
332 "x".repeat(max_size.saturating_sub(header.len()))
333 );
334 if exact_diff.len() == max_size {
335 let (result, was_truncated) =
336 truncate_diff_to_model_budget(&exact_diff, max_size as u64);
337 assert!(!was_truncated, "exact size should not trigger truncation");
338 assert_eq!(result.len(), max_size);
339 }
340
341 let over_diff = format!(
343 "{}{}",
344 header,
345 "x".repeat(max_size + 1 - header.len())
346 );
347 let (result, was_truncated) =
348 truncate_diff_to_model_budget(&over_diff, max_size as u64);
349 assert!(was_truncated, "over size should trigger truncation");
350 assert!(
351 result.len() <= max_size,
352 "truncated result {} should not exceed max_size {}",
353 result.len(),
354 max_size
355 );
356 }
357 }
358
359 #[test]
361 fn truncate_single_large_file_stays_within_budget() {
362 let max_size = 100usize;
363
364 let large_file = format!(
366 "diff --git a/src/big.rs b/src/big.rs\n+{}\n",
367 "x".repeat(max_size * 3)
368 );
369
370 let (truncated, was_truncated) =
371 truncate_diff_to_model_budget(&large_file, max_size as u64);
372 assert!(was_truncated, "large file should be truncated");
373 assert!(
374 truncated.len() <= max_size,
375 "single large file truncation {} exceeded max_size {}",
376 truncated.len(),
377 max_size
378 );
379 }
380
381 #[test]
383 fn truncate_diff_handles_unicode_boundaries() {
384 let max_size = 50usize;
385
386 let emoji_line = "🎉".repeat(20); let diff = format!("diff --git a/a b/a\n+{}\n", emoji_line);
389
390 let (truncated, was_truncated) = truncate_diff_to_model_budget(&diff, max_size as u64);
391 assert!(was_truncated, "unicode diff should be truncated");
392 assert!(
393 truncated.len() <= max_size,
394 "unicode truncation {} exceeded max_size {}",
395 truncated.len(),
396 max_size
397 );
398 assert!(
400 std::str::from_utf8(truncated.as_bytes()).is_ok(),
401 "truncated output should be valid UTF-8"
402 );
403 }
404
405 #[test]
407 fn truncate_empty_diff() {
408 let (result, was_truncated) = truncate_diff_to_model_budget("", 100);
409 assert!(!was_truncated, "empty diff should not be truncated");
410 assert_eq!(result, "");
411 }
412
413 #[test]
415 fn truncate_multiple_small_files_prefers_high_priority() {
416 let max_size = 200usize;
417
418 let src_file = "diff --git a/src/main.rs b/src/main.rs\n+high priority\n";
420 let test_file = "diff --git a/tests/test.rs b/tests/test.rs\n+medium priority\n";
421 let doc_file = "diff --git a/README.md b/README.md\n+low priority docs\n";
422 let extra = "diff --git a/extra.rs b/extra.rs\n+extra content that exceeds budget\n";
423
424 let diff = format!("{doc_file}{test_file}{src_file}{extra}");
425
426 let (truncated, was_truncated) = truncate_diff_to_model_budget(&diff, max_size as u64);
427 assert!(was_truncated, "should truncate when files exceed budget");
428 assert!(
429 truncated.len() <= max_size,
430 "truncated {} exceeded max_size {}",
431 truncated.len(),
432 max_size
433 );
434 if truncated.contains("priority") {
436 assert!(
437 truncated.contains("high priority") || truncated.contains("medium priority"),
438 "should prioritize src/tests over docs"
439 );
440 }
441 }
442}