ralph_workflow/phases/commit/
diff_truncation.rs1const MAX_SAFE_PROMPT_SIZE: u64 = 200_000;
3
4const GLM_MAX_PROMPT_SIZE: u64 = 100_000;
6
7const CLAUDE_MAX_PROMPT_SIZE: u64 = 300_000;
9
10#[must_use]
12pub fn model_budget_bytes_for_agent_name(commit_agent: &str) -> u64 {
13 let agent_lower = commit_agent.to_lowercase();
14
15 if agent_lower.contains("glm")
16 || agent_lower.contains("zhipuai")
17 || agent_lower.contains("zai")
18 || agent_lower.contains("qwen")
19 || agent_lower.contains("deepseek")
20 {
21 GLM_MAX_PROMPT_SIZE
22 } else if agent_lower.contains("claude")
23 || agent_lower.contains("ccs")
24 || agent_lower.contains("anthropic")
25 {
26 CLAUDE_MAX_PROMPT_SIZE
27 } else {
28 MAX_SAFE_PROMPT_SIZE
29 }
30}
31
32#[must_use]
33pub fn effective_model_budget_bytes(agent_names: &[String]) -> u64 {
34 agent_names
35 .iter()
36 .map(|name| model_budget_bytes_for_agent_name(name))
37 .min()
38 .unwrap_or(MAX_SAFE_PROMPT_SIZE)
39}
40
41fn truncate_diff_if_large(diff: &str, max_size: usize) -> String {
43 if diff.len() <= max_size {
44 return diff.to_string();
45 }
46
47 let mut files: Vec<DiffFile> = Vec::new();
48 let mut current_file = DiffFile::default();
49 let mut in_file = false;
50
51 for line in diff.lines() {
52 if line.starts_with("diff --git ") {
53 if in_file && !current_file.lines.is_empty() {
54 files.push(std::mem::take(&mut current_file));
55 }
56 in_file = true;
57 current_file.lines.push(line.to_string());
58
59 if let Some(path) = line.split(" b/").nth(1) {
60 current_file.path = path.to_string();
61 current_file.priority = prioritize_file_path(path);
62 }
63 } else if in_file {
64 current_file.lines.push(line.to_string());
65 }
66 }
67
68 if in_file && !current_file.lines.is_empty() {
69 files.push(current_file);
70 }
71
72 files.sort_by(|a, b| b.priority.cmp(&a.priority));
73
74 let mut result = String::new();
75 let mut current_size = 0;
76 let mut files_included = 0;
77 let total_files = files.len();
78
79 for file in &files {
80 let file_size: usize = file.lines.iter().map(|l| l.len() + 1).sum();
81
82 if current_size + file_size <= max_size {
83 for line in &file.lines {
84 result.push_str(line);
85 result.push('\n');
86 }
87 current_size += file_size;
88 files_included += 1;
89 } else if files_included == 0 {
90 let truncated_lines = truncate_lines_to_fit(&file.lines, max_size);
91 for line in truncated_lines {
92 result.push_str(&line);
93 result.push('\n');
94 }
95 files_included = 1;
96 break;
97 } else {
98 break;
99 }
100 }
101
102 if files_included < total_files {
103 let summary = format!(
104 "\n[Truncated: {files_included} of {total_files} files shown]\n"
105 );
106 if summary.len() <= max_size {
107 if result.len() + summary.len() > max_size {
108 let target_bytes = max_size.saturating_sub(summary.len());
109 if target_bytes < result.len() {
110 let mut cut = 0usize;
111 for (idx, _) in result.char_indices() {
112 if idx > target_bytes {
113 break;
114 }
115 cut = idx;
116 }
117 result.truncate(cut);
118 }
119 }
120 result.push_str(&summary);
121 }
122 }
123
124 result
125}
126
127#[must_use]
128pub fn truncate_diff_to_model_budget(diff: &str, max_size_bytes: u64) -> (String, bool) {
129 let max_size = usize::try_from(max_size_bytes).unwrap_or(usize::MAX);
130 if diff.len() <= max_size {
131 (diff.to_string(), false)
132 } else {
133 (truncate_diff_if_large(diff, max_size), true)
134 }
135}
136
137#[derive(Default)]
138struct DiffFile {
139 path: String,
140 priority: i32,
141 lines: Vec<String>,
142}
143
144fn prioritize_file_path(path: &str) -> i32 {
145 let normalized = path.replace('\\', "/");
146 let parts: Vec<&str> = normalized.split('/').filter(|p| !p.is_empty()).collect();
147
148 if parts.contains(&"src") {
149 100
150 } else if parts.contains(&"tests") {
151 50
152 } else if std::path::Path::new(&normalized)
153 .extension()
154 .is_some_and(|ext| ext.eq_ignore_ascii_case("md") || ext.eq_ignore_ascii_case("txt"))
155 {
156 10
157 } else {
158 0
159 }
160}
161
162fn truncate_to_utf8_boundary(s: &mut String, max_bytes: usize) {
163 if s.len() <= max_bytes {
164 return;
165 }
166 let mut cut = 0usize;
167 for (idx, _) in s.char_indices() {
168 if idx > max_bytes {
169 break;
170 }
171 cut = idx;
172 }
173 s.truncate(cut);
174}
175
176fn truncate_lines_to_fit(lines: &[String], max_size: usize) -> Vec<String> {
177 let mut result = Vec::new();
178 let mut current_size = 0;
179
180 for line in lines {
181 let line_size = line.len() + 1;
182 if current_size + line_size <= max_size {
183 current_size += line_size;
184 result.push(line.clone());
185 } else {
186 break;
187 }
188 }
189
190 let suffix = " [truncated...]";
191 let suffix_len = suffix.len();
192
193 if !result.is_empty() {
194 let mut total_size = current_size;
198 while !result.is_empty() && total_size + suffix_len > max_size {
199 let last_len = result.last().expect("checked non-empty").len();
200 let excess = total_size + suffix_len - max_size;
201 if excess < last_len {
202 let new_len = last_len - excess;
203 let last = result.last_mut().expect("checked non-empty");
204 truncate_to_utf8_boundary(last, new_len);
205 break;
206 }
207 let dropped = result.pop().expect("checked non-empty");
209 total_size = total_size.saturating_sub(dropped.len() + 1);
210 }
211
212 if let Some(last) = result.last_mut() {
213 last.push_str(suffix);
214 }
215 }
216
217 result
218}
219
220#[cfg(test)]
221mod diff_truncation_tests {
222 use super::*;
223
224 #[test]
225 fn prioritize_file_path_handles_crate_prefixed_paths() {
226 assert_eq!(prioritize_file_path("ralph-workflow/src/lib.rs"), 100);
229 assert_eq!(prioritize_file_path("ralph-workflow/tests/integration.rs"), 50);
230 assert_eq!(prioritize_file_path("README.md"), 10);
231 }
232
233 #[test]
234 fn truncate_diff_to_model_budget_never_exceeds_max_size() {
235 let files_included = 1;
236 let total_files = 2;
237 let summary = format!(
238 "\n[Truncated: {files_included} of {total_files} files shown]\n"
239 );
240
241 let max_size = 1_000usize;
242
243 let file1_header = "diff --git a/src/a.rs b/src/a.rs";
248 let desired_file1_size = max_size - summary.len() + 1;
249 let filler_line_len = desired_file1_size.saturating_sub(file1_header.len() + 2);
250 let file1 = format!(
251 "{file1_header}\n+{}\n",
252 "x".repeat(filler_line_len.saturating_sub(1))
253 );
254
255 let file2 = "diff --git a/tests/b.rs b/tests/b.rs\n+small\n";
256 let diff = format!("{file1}{file2}");
257
258 let (truncated, was_truncated) = truncate_diff_to_model_budget(&diff, max_size as u64);
259 assert!(was_truncated, "expected truncation when diff exceeds max size");
260 assert!(
261 truncated.len() <= max_size,
262 "truncated diff must not exceed max_size (got {} > {})",
263 truncated.len(),
264 max_size
265 );
266 }
267
268 #[test]
269 fn truncate_lines_to_fit_reserves_space_for_truncation_suffix() {
270 let max_size = 20usize;
274 let lines = vec!["x".repeat(max_size - 1)];
275
276 let truncated = truncate_lines_to_fit(&lines, max_size);
277
278 let total_size: usize = truncated.iter().map(|l| l.len() + 1).sum();
279 assert!(
280 total_size <= max_size,
281 "truncate_lines_to_fit must not exceed max_size after adding suffix (got {total_size} > {max_size})"
282 );
283 }
284
285 #[test]
294 fn truncate_diff_invariant_never_exceeds_max_size_edge_cases() {
295 let summary_len = "\n[Truncated: 1 of 2 files shown]\n".len();
297
298 for max_size in [
299 10, summary_len - 1, summary_len, summary_len + 1, summary_len + 10, 100, 1000, ] {
307 let file1 = format!(
308 "diff --git a/src/a.rs b/src/a.rs\n+{}\n",
309 "x".repeat(max_size)
310 );
311 let file2 = "diff --git a/tests/b.rs b/tests/b.rs\n+extra\n";
312 let diff = format!("{file1}{file2}");
313
314 let (truncated, _) = truncate_diff_to_model_budget(&diff, max_size as u64);
315 assert!(
316 truncated.len() <= max_size,
317 "truncated diff exceeded max_size {} (got {}): {:?}",
318 max_size,
319 truncated.len(),
320 &truncated[..truncated.len().min(100)]
321 );
322 }
323 }
324
325 #[test]
327 fn truncate_diff_boundary_content_sizes() {
328 for max_size in [50usize, 100, 200, 500] {
329 let header = "diff --git a/a b/a\n+";
331 let exact_diff = format!(
332 "{}{}",
333 header,
334 "x".repeat(max_size.saturating_sub(header.len()))
335 );
336 if exact_diff.len() == max_size {
337 let (result, was_truncated) =
338 truncate_diff_to_model_budget(&exact_diff, max_size as u64);
339 assert!(!was_truncated, "exact size should not trigger truncation");
340 assert_eq!(result.len(), max_size);
341 }
342
343 let over_diff = format!(
345 "{}{}",
346 header,
347 "x".repeat(max_size + 1 - header.len())
348 );
349 let (result, was_truncated) =
350 truncate_diff_to_model_budget(&over_diff, max_size as u64);
351 assert!(was_truncated, "over size should trigger truncation");
352 assert!(
353 result.len() <= max_size,
354 "truncated result {} should not exceed max_size {}",
355 result.len(),
356 max_size
357 );
358 }
359 }
360
361 #[test]
363 fn truncate_single_large_file_stays_within_budget() {
364 let max_size = 100usize;
365
366 let large_file = format!(
368 "diff --git a/src/big.rs b/src/big.rs\n+{}\n",
369 "x".repeat(max_size * 3)
370 );
371
372 let (truncated, was_truncated) =
373 truncate_diff_to_model_budget(&large_file, max_size as u64);
374 assert!(was_truncated, "large file should be truncated");
375 assert!(
376 truncated.len() <= max_size,
377 "single large file truncation {} exceeded max_size {}",
378 truncated.len(),
379 max_size
380 );
381 }
382
383 #[test]
385 fn truncate_diff_handles_unicode_boundaries() {
386 let max_size = 50usize;
387
388 let emoji_line = "🎉".repeat(20); let diff = format!("diff --git a/a b/a\n+{emoji_line}\n");
391
392 let (truncated, was_truncated) = truncate_diff_to_model_budget(&diff, max_size as u64);
393 assert!(was_truncated, "unicode diff should be truncated");
394 assert!(
395 truncated.len() <= max_size,
396 "unicode truncation {} exceeded max_size {}",
397 truncated.len(),
398 max_size
399 );
400 assert!(
402 std::str::from_utf8(truncated.as_bytes()).is_ok(),
403 "truncated output should be valid UTF-8"
404 );
405 }
406
407 #[test]
409 fn truncate_empty_diff() {
410 let (result, was_truncated) = truncate_diff_to_model_budget("", 100);
411 assert!(!was_truncated, "empty diff should not be truncated");
412 assert_eq!(result, "");
413 }
414
415 #[test]
417 fn truncate_multiple_small_files_prefers_high_priority() {
418 let max_size = 200usize;
419
420 let src_file = "diff --git a/src/main.rs b/src/main.rs\n+high priority\n";
422 let test_file = "diff --git a/tests/test.rs b/tests/test.rs\n+medium priority\n";
423 let doc_file = "diff --git a/README.md b/README.md\n+low priority docs\n";
424 let extra = "diff --git a/extra.rs b/extra.rs\n+extra content that exceeds budget\n";
425
426 let diff = format!("{doc_file}{test_file}{src_file}{extra}");
427
428 let (truncated, was_truncated) = truncate_diff_to_model_budget(&diff, max_size as u64);
429 assert!(was_truncated, "should truncate when files exceed budget");
430 assert!(
431 truncated.len() <= max_size,
432 "truncated {} exceeded max_size {}",
433 truncated.len(),
434 max_size
435 );
436 if truncated.contains("priority") {
438 assert!(
439 truncated.contains("high priority") || truncated.contains("medium priority"),
440 "should prioritize src/tests over docs"
441 );
442 }
443 }
444}