1use regex::Regex;
2use serde_json::Value;
3use std::sync::LazyLock;
4
5static SUMMARY_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)</?summary[^>]*>").unwrap());
6static LANG_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^```(\w*)").unwrap());
7
8fn starts_with_ignore_ascii_case(s: &str, prefix: &str) -> bool {
10 s.len() >= prefix.len() && s.as_bytes()[..prefix.len()].eq_ignore_ascii_case(prefix.as_bytes())
11}
12
13pub fn safe_byte_index(s: &str, pos: usize) -> usize {
15 let pos = pos.min(s.len());
16 let mut i = pos;
18 while i > 0 && !s.is_char_boundary(i) {
19 i -= 1;
20 }
21 i
22}
23
24const CODE_BLOCK_MAX_LINES: usize = 20;
29const CODE_BLOCK_KEEP: usize = 5;
30const MAINTAINER_LIMIT: usize = 5_000;
31const COMMENT_PREVIEW_CHARS: usize = 500;
32const REVIEW_PREVIEW_LINES: usize = 3;
33const REVIEW_PREVIEW_CHARS: usize = 300;
34const PATCH_INLINE_MAX_LINES: usize = 80;
36const PATCH_INLINE_KEEP: usize = 20;
38
39const MAINTAINER_ROLES: &[&str] = &["OWNER", "MEMBER", "COLLABORATOR"];
40
41const DEFAULT_BUDGET: usize = 60_000;
43const DEFAULT_ITEM_BUDGET: usize = 15_000;
44const BUDGET_MARGIN: f64 = 0.90;
46
47const TIER5_PATCH_MAX_LINES: usize = 30;
49const TIER5_PATCH_KEEP: usize = 15;
50const TIER6_BODY_LIMIT: usize = 5_000;
51const TIER9_BODY_LIMIT: usize = 2_000;
52const TIER9_COMMENT_LIMIT: usize = 200;
53const TIER9_REVIEW_CHARS: usize = 150;
54
55const HUGE_THREAD_THRESHOLD: usize = 50;
57const DIGEST_HEAD: usize = 5;
58const DIGEST_TAIL: usize = 5;
59const DIGEST_MAINTAINER_MAX: usize = 15;
60const DIGEST_MAINTAINER_CHARS: usize = 300;
61
62fn estimate_size(val: &Value) -> usize {
67 crate::github::estimate_json_size(val)
68}
69
70pub fn collapse_code_blocks_mut(text: &str, cache: &mut Option<Value>) -> String {
76 if text.is_empty() {
77 return text.to_string();
78 }
79
80 let lines: Vec<&str> = text.split('\n').collect();
81 let mut out: Vec<String> = Vec::new();
82 let mut i = 0;
83
84 while i < lines.len() {
85 let stripped = lines[i].trim();
86
87 if starts_with_ignore_ascii_case(stripped, "<details") {
89 let mut j = i + 1;
90 let mut summary = String::new();
91 while j < lines.len() {
92 let s = lines[j].trim();
93 if summary.is_empty() && starts_with_ignore_ascii_case(s, "<summary") {
94 summary = SUMMARY_RE.replace_all(s, "").trim().to_string();
95 }
96 if starts_with_ignore_ascii_case(s, "</details") {
97 break;
98 }
99 j += 1;
100 }
101 let hidden = if j > i { j - i - 1 } else { 0 };
102 if hidden > 3 {
103 let label = if summary.is_empty() {
104 "collapsed section".to_string()
105 } else {
106 summary
107 };
108 if let Some(ref mut c) = cache {
109 let n = c.get("_n").and_then(|v| v.as_u64()).unwrap_or(0) + 1;
110 c["_n"] = Value::from(n);
111 let eid = format!("details_{}", n);
112 let content: String = lines[(i + 1)..j].join("\n");
113 c[&eid] = serde_json::json!({
114 "type": "details",
115 "summary": label,
116 "total_lines": hidden,
117 "content": content,
118 });
119 out.push(format!("[{} — {} lines hidden, id:{}]", label, hidden, eid));
120 } else {
121 out.push(format!("[{} — {} lines hidden]", label, hidden));
122 }
123 i = (j + 1).min(lines.len());
124 continue;
125 }
126 }
127
128 if stripped.starts_with("```") {
130 let fence_line = lines[i];
131 let mut j = i + 1;
132 while j < lines.len() && !lines[j].trim().starts_with("```") {
133 j += 1;
134 }
135 let has_close = j < lines.len();
136 let end = if has_close { j + 1 } else { j };
137 let inner = end - i - if has_close { 2 } else { 1 };
138
139 if inner > CODE_BLOCK_MAX_LINES {
140 let hidden = inner - 2 * CODE_BLOCK_KEEP;
141
142 if let Some(ref mut c) = cache {
143 let n = c.get("_n").and_then(|v| v.as_u64()).unwrap_or(0) + 1;
144 c["_n"] = Value::from(n);
145 let eid = format!("cb_{}", n);
146 let lang = LANG_RE
147 .captures(fence_line.trim())
148 .and_then(|cap| cap.get(1))
149 .map(|m| m.as_str().to_string())
150 .unwrap_or_default();
151 let content_end = if has_close { j } else { end };
152 let content: String = lines[(i + 1)..content_end].join("\n");
153 c[&eid] = serde_json::json!({
154 "type": "code_block",
155 "language": lang,
156 "total_lines": inner,
157 "content": content,
158 });
159 out.push(format!("{} [id:{}, {} lines]", fence_line, eid, inner));
160 } else {
161 out.push(fence_line.to_string());
162 }
163
164 for line in lines
166 .iter()
167 .take((i + 1 + CODE_BLOCK_KEEP).min(lines.len()))
168 .skip(i + 1)
169 {
170 out.push(line.to_string());
171 }
172 out.push(format!(" ... ({} lines hidden)", hidden));
173
174 if has_close {
176 let start = j.saturating_sub(CODE_BLOCK_KEEP);
177 for line in lines.iter().take(j).skip(start) {
178 out.push(line.to_string());
179 }
180 out.push(lines[j].to_string());
181 } else {
182 let start = end.saturating_sub(CODE_BLOCK_KEEP);
183 for line in lines.iter().take(end).skip(start) {
184 out.push(line.to_string());
185 }
186 }
187 } else {
188 for line in lines.iter().take(end).skip(i) {
189 out.push(line.to_string());
190 }
191 }
192 i = end;
193 continue;
194 }
195
196 out.push(lines[i].to_string());
197 i += 1;
198 }
199
200 out.join("\n")
201}
202
203pub fn compact_text_mut(text: &str, limit: usize, cache: &mut Option<Value>) -> (String, bool) {
206 if text.is_empty() {
207 return (String::new(), false);
208 }
209 let collapsed = collapse_code_blocks_mut(text, cache);
210 if collapsed.len() > limit {
211 let truncated = format!(
212 "{}…[truncated]",
213 &collapsed[..safe_byte_index(&collapsed, limit)]
214 );
215 (truncated, true)
216 } else {
217 (collapsed, false)
218 }
219}
220
221fn filter_bot_comments(result: &mut Value) -> usize {
227 if let Some(comments) = result.get_mut("comments").and_then(|v| v.as_array_mut()) {
228 let original_len = comments.len();
229 comments.retain(|c| {
230 c.get("author")
231 .and_then(|a| a.as_str())
232 .map(|a| !a.ends_with("[bot]"))
233 .unwrap_or(true)
234 });
235 let bot_count = original_len - comments.len();
236 if bot_count > 0 {
237 result["_bot_comments_hidden"] = Value::from(bot_count as u64);
238 }
239 bot_count
240 } else {
241 0
242 }
243}
244
245fn collapse_body_code_blocks(result: &mut Value, cache: &mut Option<Value>) {
247 if let Some(body) = result
248 .get("body")
249 .and_then(|v| v.as_str())
250 .map(|s| s.to_string())
251 {
252 let collapsed = collapse_code_blocks_mut(&body, cache);
253 result["body"] = Value::String(collapsed);
254 }
255}
256
257fn collapse_comment_code_blocks(result: &mut Value, cache: &mut Option<Value>) {
259 if let Some(comments) = result.get_mut("comments").and_then(|v| v.as_array_mut()) {
260 for c in comments.iter_mut() {
261 if let Some(body) = c
262 .get("body")
263 .and_then(|v| v.as_str())
264 .map(|s| s.to_string())
265 {
266 let collapsed = collapse_code_blocks_mut(&body, cache);
267 c["body"] = Value::String(collapsed);
268 }
269 }
270 }
271}
272
273fn truncate_non_maintainer_comments(result: &mut Value, limit: usize, cache: &mut Option<Value>) {
275 if let Some(comments) = result.get_mut("comments").and_then(|v| v.as_array_mut()) {
276 for c in comments.iter_mut() {
277 let is_maintainer = c
278 .get("author_association")
279 .and_then(|a| a.as_str())
280 .map(|a| MAINTAINER_ROLES.contains(&a))
281 .unwrap_or(false);
282 if is_maintainer {
283 continue;
284 }
285 truncate_comment(c, limit, cache);
286 }
287 }
288}
289
290fn collapse_patches_over(
292 result: &mut Value,
293 max_lines: usize,
294 keep_lines: usize,
295 cache: &mut Option<Value>,
296) {
297 if let Some(files) = result.get_mut("files").and_then(|v| v.as_array_mut()) {
298 for f in files.iter_mut() {
299 if let Some(obj) = f.as_object_mut() {
300 let patch_text = match obj.get("patch").and_then(|v| v.as_str()) {
301 Some(p) if !p.is_empty() => p.to_string(),
302 _ => continue,
303 };
304 let total_lines = patch_text.matches('\n').count() + 1;
305 if total_lines <= max_lines {
306 continue;
307 }
308
309 let filename = obj
310 .get("filename")
311 .and_then(|v| v.as_str())
312 .unwrap_or("")
313 .to_string();
314 let additions = obj.get("additions").and_then(|v| v.as_u64()).unwrap_or(0);
315 let deletions = obj.get("deletions").and_then(|v| v.as_u64()).unwrap_or(0);
316
317 let eid = ensure_patch_cached(
318 obj,
319 &patch_text,
320 &filename,
321 additions,
322 deletions,
323 total_lines,
324 cache,
325 );
326
327 obj.remove("patch");
328 let preview: String = patch_text
329 .split('\n')
330 .take(keep_lines)
331 .collect::<Vec<_>>()
332 .join("\n");
333 obj.insert(
334 "patch_preview".to_string(),
335 Value::String(format!(
336 "{}\n\n... [{} more lines]",
337 preview,
338 total_lines - keep_lines
339 )),
340 );
341 if let Some(eid) = eid {
342 obj.insert("patch_id".to_string(), Value::String(eid));
343 }
344 }
345 }
346 }
347}
348
349fn truncate_maintainer_comments(result: &mut Value, limit: usize, cache: &mut Option<Value>) {
351 if let Some(comments) = result.get_mut("comments").and_then(|v| v.as_array_mut()) {
352 for c in comments.iter_mut() {
353 let is_maintainer = c
354 .get("author_association")
355 .and_then(|a| a.as_str())
356 .map(|a| MAINTAINER_ROLES.contains(&a))
357 .unwrap_or(false);
358 if !is_maintainer {
359 continue;
360 }
361 truncate_comment(c, limit, cache);
362 }
363 }
364}
365
366fn truncate_body(result: &mut Value, limit: usize, cache: &mut Option<Value>) {
368 if let Some(body) = result
369 .get("body")
370 .and_then(|v| v.as_str())
371 .map(|s| s.to_string())
372 {
373 if body.len() <= limit {
374 return;
375 }
376 let (compacted, truncated) = compact_text_mut(&body, limit, cache);
377 result["body"] = Value::String(compacted);
378 if truncated {
379 result["_body_truncated"] = Value::Bool(true);
380 }
381 }
382}
383
384fn compact_reviews(
386 result: &mut Value,
387 preview_lines: usize,
388 preview_chars: usize,
389 cache: &mut Option<Value>,
390) {
391 if let Some(reviews) = result.get_mut("reviews").and_then(|v| v.as_array_mut()) {
392 for review in reviews.iter_mut() {
393 let reviewer = review
394 .get("author")
395 .and_then(|a| a.as_str())
396 .unwrap_or("")
397 .to_string();
398 if let Some(inlines) = review
399 .get("inline_comments")
400 .and_then(|v| v.as_array())
401 .cloned()
402 {
403 if !inlines.is_empty() {
404 let compacted: Vec<Value> = inlines
405 .iter()
406 .map(|ic| {
407 compact_single_review_comment(
408 ic,
409 &reviewer,
410 preview_lines,
411 preview_chars,
412 cache,
413 )
414 })
415 .collect();
416 review["inline_comments"] = Value::Array(compacted);
417 }
418 }
419 }
420 }
421}
422
423fn remove_inline_patches(result: &mut Value, cache: &mut Option<Value>) {
425 if let Some(files) = result.get_mut("files").and_then(|v| v.as_array_mut()) {
426 for f in files.iter_mut() {
427 if let Some(obj) = f.as_object_mut() {
428 if let Some(patch_text) = obj
429 .get("patch")
430 .and_then(|v| v.as_str())
431 .map(|s| s.to_string())
432 {
433 if !patch_text.is_empty() {
434 let filename = obj
435 .get("filename")
436 .and_then(|v| v.as_str())
437 .unwrap_or("")
438 .to_string();
439 let additions = obj.get("additions").and_then(|v| v.as_u64()).unwrap_or(0);
440 let deletions = obj.get("deletions").and_then(|v| v.as_u64()).unwrap_or(0);
441 let total_lines = patch_text.matches('\n').count() + 1;
442 ensure_patch_cached(
443 obj,
444 &patch_text,
445 &filename,
446 additions,
447 deletions,
448 total_lines,
449 cache,
450 );
451 }
452 }
453 obj.remove("patch");
454 obj.remove("patch_preview");
455 }
456 }
457 }
458}
459
460fn enforce_per_item_limits(
462 result: &mut Value,
463 item_budget: usize,
464 cache: &mut Option<Value>,
465) -> Vec<String> {
466 let mut actions: Vec<String> = Vec::new();
467
468 if let Some(body) = result.get("body").and_then(|v| v.as_str()).map(|s| s.len()) {
470 if body > item_budget {
471 truncate_body(result, item_budget, cache);
472 actions.push("body truncated (over per-item limit)".into());
473 }
474 }
475
476 let mut patches_capped = 0usize;
478 if let Some(files) = result.get_mut("files").and_then(|v| v.as_array_mut()) {
479 for f in files.iter_mut() {
480 if let Some(obj) = f.as_object_mut() {
481 let patch_len = obj
482 .get("patch")
483 .and_then(|v| v.as_str())
484 .map(|s| s.len())
485 .unwrap_or(0);
486 if patch_len > item_budget {
487 let patch_text = obj.remove("patch").unwrap();
488 let patch_str = patch_text.as_str().unwrap_or("");
489 let total_lines = patch_str.matches('\n').count() + 1;
490 let filename = obj
491 .get("filename")
492 .and_then(|v| v.as_str())
493 .unwrap_or("")
494 .to_string();
495 let additions = obj.get("additions").and_then(|v| v.as_u64()).unwrap_or(0);
496 let deletions = obj.get("deletions").and_then(|v| v.as_u64()).unwrap_or(0);
497
498 let eid = ensure_patch_cached(
499 obj,
500 patch_str,
501 &filename,
502 additions,
503 deletions,
504 total_lines,
505 cache,
506 );
507
508 let preview: String = patch_str
509 .split('\n')
510 .take(PATCH_INLINE_KEEP)
511 .collect::<Vec<_>>()
512 .join("\n");
513 obj.insert(
514 "patch_preview".to_string(),
515 Value::String(format!(
516 "{}\n\n... [{} more lines]",
517 preview,
518 total_lines.saturating_sub(PATCH_INLINE_KEEP)
519 )),
520 );
521 if let Some(eid) = eid {
522 obj.insert("patch_id".to_string(), Value::String(eid));
523 }
524 patches_capped += 1;
525 }
526 }
527 }
528 }
529 if patches_capped > 0 {
530 actions.push(format!(
531 "{} large patch(es) collapsed (over per-item limit)",
532 patches_capped
533 ));
534 }
535
536 if let Some(comments) = result.get_mut("comments").and_then(|v| v.as_array_mut()) {
538 for c in comments.iter_mut() {
539 let body_len = c
540 .get("body")
541 .and_then(|v| v.as_str())
542 .map(|s| s.len())
543 .unwrap_or(0);
544 if body_len > item_budget {
545 truncate_comment(c, item_budget, cache);
546 }
547 }
548 }
549
550 actions
551}
552
553fn truncate_comment(c: &mut Value, limit: usize, cache: &mut Option<Value>) {
559 if c.get("_truncated")
560 .and_then(|v| v.as_bool())
561 .unwrap_or(false)
562 {
563 return; }
565 let original_body = c
566 .get("body")
567 .and_then(|b| b.as_str())
568 .unwrap_or("")
569 .to_string();
570 if original_body.len() <= limit {
571 return;
572 }
573 let (compacted, truncated) = compact_text_mut(&original_body, limit, cache);
574 c["body"] = Value::String(compacted);
575 if truncated {
576 c["_truncated"] = Value::Bool(true);
577 if let Some(ref mut cv) = cache {
578 let n = cv.get("_n").and_then(|v| v.as_u64()).unwrap_or(0);
579 let eid = format!("comment_{}", n);
580 cv[&eid] = serde_json::json!({
581 "type": "comment",
582 "author": c.get("author").and_then(|a| a.as_str()).unwrap_or(""),
583 "total_lines": original_body.matches('\n').count() + 1,
584 "content": original_body,
585 });
586 c["_element_id"] = Value::String(eid);
587 }
588 }
589}
590
591fn ensure_patch_cached(
593 obj: &mut serde_json::Map<String, Value>,
594 patch_text: &str,
595 filename: &str,
596 additions: u64,
597 deletions: u64,
598 total_lines: usize,
599 cache: &mut Option<Value>,
600) -> Option<String> {
601 if let Some(existing) = obj.get("patch_id").and_then(|v| v.as_str()) {
602 return Some(existing.to_string());
603 }
604 if let Some(ref mut c) = cache {
605 let n = c.get("_n").and_then(|v| v.as_u64()).unwrap_or(0) + 1;
606 c["_n"] = Value::from(n);
607 let eid = format!("patch_{}", n);
608 c[&eid] = serde_json::json!({
609 "type": "patch",
610 "filename": filename,
611 "additions": additions,
612 "deletions": deletions,
613 "total_lines": total_lines,
614 "content": patch_text,
615 });
616 Some(eid)
617 } else {
618 None
619 }
620}
621
622fn compact_single_review_comment(
624 ic: &Value,
625 reviewer: &str,
626 preview_lines: usize,
627 preview_chars: usize,
628 cache: &mut Option<Value>,
629) -> Value {
630 let body = ic.get("body").and_then(|b| b.as_str()).unwrap_or("");
631 let preview: String = {
632 let lines: Vec<&str> = body.split('\n').collect();
633 let kept: String = lines[..lines.len().min(preview_lines)].join("\n");
634 if kept.len() > preview_chars {
635 let mut s: String = kept.chars().take(preview_chars).collect();
636 s.push_str("...");
637 s
638 } else if lines.len() > preview_lines {
639 format!("{}...", kept)
640 } else {
641 kept
642 }
643 };
644 let replies = ic
645 .get("replies")
646 .and_then(|r| r.as_array())
647 .map(|r| r.len())
648 .unwrap_or(0);
649 let path = ic.get("path").and_then(|p| p.as_str()).unwrap_or("");
650
651 let eid = if let Some(ref mut c) = cache {
652 let n = c.get("_n").and_then(|v| v.as_u64()).unwrap_or(0) + 1;
653 c["_n"] = Value::from(n);
654 let eid = format!("review_{}", n);
655 c[&eid] = serde_json::json!({
656 "type": "review_comment",
657 "author": reviewer,
658 "path": path,
659 "line": ic.get("line"),
660 "total_lines": body.matches('\n').count() + 1,
661 "content": body,
662 "replies": ic.get("replies"),
663 });
664 Some(eid)
665 } else {
666 None
667 };
668
669 let mut entry = serde_json::json!({
670 "path": path,
671 "line": ic.get("line"),
672 "preview": preview,
673 "replies": replies,
674 });
675 if let Some(eid) = eid {
676 entry["_element_id"] = Value::String(eid);
677 }
678 entry
679}
680
681fn build_thread_digest(result: &mut Value, cache: &mut Option<Value>) -> String {
689 let comments = match result.get_mut("comments").and_then(|v| v.as_array_mut()) {
690 Some(c) => c,
691 None => return String::new(),
692 };
693
694 let total = comments.len();
695 let head = DIGEST_HEAD.min(total);
696 let tail = DIGEST_TAIL.min(total.saturating_sub(head));
697 let middle_start = head;
698 let middle_end = total.saturating_sub(tail);
699
700 if middle_start >= middle_end {
701 return String::new(); }
703
704 let middle_comments: Vec<Value> = comments[middle_start..middle_end].to_vec();
706 let middle_count = middle_comments.len();
707
708 let mut maintainer_highlights: Vec<Value> = Vec::new();
710 let mut maintainer_total = 0usize;
711 for (i, c) in middle_comments.iter().enumerate() {
712 let eid = format!("comment_{}", middle_start + i);
713
714 if let Some(ref mut cache_obj) = cache {
716 if let Some(obj) = cache_obj.as_object_mut() {
717 let mut cached = c.clone();
718 cached["_index"] = Value::Number((middle_start + i).into());
719 obj.insert(
720 eid.clone(),
721 serde_json::json!({
722 "type": "comment",
723 "content": cached,
724 }),
725 );
726 }
727 }
728
729 let assoc = c
730 .get("author_association")
731 .and_then(|v| v.as_str())
732 .unwrap_or("");
733 if MAINTAINER_ROLES.contains(&assoc) {
734 maintainer_total += 1;
735 if maintainer_highlights.len() < DIGEST_MAINTAINER_MAX {
736 let mut highlight = c.clone();
737 if let Some(body) = highlight.get("body").and_then(|v| v.as_str()) {
739 if body.len() > DIGEST_MAINTAINER_CHARS {
740 let cut = safe_byte_index(body, DIGEST_MAINTAINER_CHARS);
741 highlight["body"] = Value::String(format!("{}…", &body[..cut]));
742 }
743 }
744 highlight["_element_id"] = Value::String(eid);
745 highlight["_index"] = Value::Number((middle_start + i).into());
746 maintainer_highlights.push(highlight);
747 }
748 }
749 }
750
751 let first_date = middle_comments
753 .first()
754 .and_then(|c| c.get("created_at"))
755 .and_then(|v| v.as_str())
756 .unwrap_or("?");
757 let last_date = middle_comments
758 .last()
759 .and_then(|c| c.get("created_at"))
760 .and_then(|v| v.as_str())
761 .unwrap_or("?");
762
763 let head_comments: Vec<Value> = comments[..head].to_vec();
765 let tail_comments: Vec<Value> = comments[total - tail..].to_vec();
766
767 let mut digest: Vec<Value> = Vec::new();
768 digest.extend(head_comments);
769
770 let gap_msg = format!(
772 "--- {middle_count} comments omitted ({maintainer_total} from maintainers). \
773 Date range: {first_date} to {last_date}. \
774 Use element_id='comments_middle' with grep='pattern' to search. ---"
775 );
776 digest.push(serde_json::json!({
777 "author": "[system]",
778 "body": gap_msg,
779 }));
780
781 if !maintainer_highlights.is_empty() {
783 digest.extend(maintainer_highlights);
784 digest.push(serde_json::json!({
785 "author": "[system]",
786 "body": "--- end maintainer highlights, recent comments follow ---",
787 }));
788 }
789
790 digest.extend(tail_comments);
791
792 if let Some(ref mut cache_obj) = cache {
794 if let Some(obj) = cache_obj.as_object_mut() {
795 let indexed: Vec<Value> = middle_comments
796 .into_iter()
797 .enumerate()
798 .map(|(i, mut c)| {
799 c["_index"] = Value::Number((middle_start + i).into());
800 c
801 })
802 .collect();
803 obj.insert(
804 "comments_middle".to_string(),
805 serde_json::json!({
806 "type": "comment_segment",
807 "label": "middle",
808 "comment_count": middle_count,
809 "content": Value::Array(indexed),
810 }),
811 );
812 }
813 }
814
815 *comments = digest;
817
818 let comment_count = result
819 .get("comment_count")
820 .and_then(|v| v.as_u64())
821 .unwrap_or(total as u64);
822
823 format!(
824 "thread digest ({} total comments, {} shown inline, {} maintainer highlights)",
825 comment_count,
826 head + tail,
827 maintainer_total.min(DIGEST_MAINTAINER_MAX),
828 )
829}
830
831fn compact_discussion_internal(
840 result: &mut Value,
841 cache: &mut Option<Value>,
842 budget: usize,
843 item_budget: usize,
844) -> Vec<String> {
845 let effective_budget = (budget as f64 * BUDGET_MARGIN) as usize;
846
847 let mut compacted_sections: Vec<String> = Vec::new();
849
850 let bot_count = filter_bot_comments(result);
852 if bot_count > 0 {
853 compacted_sections.push(format!("{} bot comments filtered", bot_count));
854 }
855
856 collapse_body_code_blocks(result, cache);
858
859 let comment_count = result
861 .get("comments")
862 .and_then(|v| v.as_array())
863 .map(|a| a.len())
864 .unwrap_or(0);
865
866 if comment_count > HUGE_THREAD_THRESHOLD {
867 let digest_desc = build_thread_digest(result, cache);
868 if !digest_desc.is_empty() {
869 compacted_sections.push(digest_desc);
870 }
871 }
872
873 let item_actions = enforce_per_item_limits(result, item_budget, cache);
875 compacted_sections.extend(item_actions);
876
877 let mut size = estimate_size(result);
879 if size <= effective_budget {
880 cache_all_patches(result, cache);
882 if !compacted_sections.is_empty() {
883 result["_compaction"] = Value::String(format!(
884 "{}. Use element_id to drill down.",
885 compacted_sections.join("; ")
886 ));
887 }
888 return compacted_sections;
889 }
890
891 let mut tier_reached: u8 = 0;
893
894 if size > effective_budget {
896 tier_reached = 1;
897 collapse_comment_code_blocks(result, cache);
898 compacted_sections.push("code blocks collapsed".into());
899 size = estimate_size(result);
900 }
901
902 if size > effective_budget {
904 tier_reached = 2;
905 truncate_non_maintainer_comments(result, COMMENT_PREVIEW_CHARS, cache);
906 compacted_sections.push("non-maintainer comments truncated".into());
907 size = estimate_size(result);
908 }
909
910 if size > effective_budget {
912 tier_reached = 3;
913 collapse_patches_over(result, PATCH_INLINE_MAX_LINES, PATCH_INLINE_KEEP, cache);
914 compacted_sections.push("large patches (>80 lines) collapsed".into());
915 size = estimate_size(result);
916 }
917
918 if size > effective_budget {
920 tier_reached = 4;
921 truncate_maintainer_comments(result, MAINTAINER_LIMIT, cache);
922 compacted_sections.push("maintainer comments truncated".into());
923 size = estimate_size(result);
924 }
925
926 if size > effective_budget {
928 tier_reached = 5;
929 collapse_patches_over(result, TIER5_PATCH_MAX_LINES, TIER5_PATCH_KEEP, cache);
930 compacted_sections.push("medium patches (>30 lines) collapsed".into());
931 size = estimate_size(result);
932 }
933
934 if size > effective_budget {
936 tier_reached = 6;
937 truncate_body(result, TIER6_BODY_LIMIT, cache);
938 compacted_sections.push("PR body truncated".into());
939 size = estimate_size(result);
940 }
941
942 if size > effective_budget {
944 tier_reached = 7;
945 compact_reviews(result, REVIEW_PREVIEW_LINES, REVIEW_PREVIEW_CHARS, cache);
946 compacted_sections.push("review comments compacted".into());
947 size = estimate_size(result);
948 }
949
950 if size > effective_budget {
952 tier_reached = 8;
953 remove_inline_patches(result, cache);
954 compacted_sections.push("all patches removed (use patch_id to drill down)".into());
955 size = estimate_size(result);
956 }
957
958 if size > effective_budget {
960 tier_reached = 9;
961 truncate_body(result, TIER9_BODY_LIMIT, cache);
962 truncate_non_maintainer_comments(result, TIER9_COMMENT_LIMIT, cache);
963 truncate_maintainer_comments(result, TIER9_COMMENT_LIMIT, cache);
964 compact_reviews(result, 1, TIER9_REVIEW_CHARS, cache);
965 compacted_sections.push("aggressive compaction applied".into());
966 let _ = estimate_size(result);
967 }
968
969 cache_all_patches(result, cache);
971
972 if tier_reached > 0 {
974 result["_compaction"] = Value::String(format!(
975 "Budget compaction (tier {}). {}. Use element_id to drill down.",
976 tier_reached,
977 compacted_sections.join("; ")
978 ));
979 }
980
981 compacted_sections
982}
983
984fn cache_all_patches(result: &mut Value, cache: &mut Option<Value>) {
986 if let Some(files) = result.get_mut("files").and_then(|v| v.as_array_mut()) {
987 for f in files.iter_mut() {
988 if let Some(obj) = f.as_object_mut() {
989 if obj.contains_key("patch_id") {
990 continue;
991 }
992 if let Some(patch_text) = obj
993 .get("patch")
994 .and_then(|v| v.as_str())
995 .map(|s| s.to_string())
996 {
997 if patch_text.is_empty() {
998 continue;
999 }
1000 let filename = obj
1001 .get("filename")
1002 .and_then(|v| v.as_str())
1003 .unwrap_or("")
1004 .to_string();
1005 let additions = obj.get("additions").and_then(|v| v.as_u64()).unwrap_or(0);
1006 let deletions = obj.get("deletions").and_then(|v| v.as_u64()).unwrap_or(0);
1007 let total_lines = patch_text.matches('\n').count() + 1;
1008 let eid = ensure_patch_cached(
1009 obj,
1010 &patch_text,
1011 &filename,
1012 additions,
1013 deletions,
1014 total_lines,
1015 cache,
1016 );
1017 if let Some(eid) = eid {
1018 obj.insert("patch_id".to_string(), Value::String(eid));
1019 }
1020 }
1021 }
1022 }
1023 }
1024}
1025
1026pub fn collapse_code_blocks(text: &str, cache_json: Option<&str>) -> (String, Option<String>) {
1035 let mut cache: Option<Value> = cache_json.and_then(|s| serde_json::from_str(s).ok());
1036 let result = collapse_code_blocks_mut(text, &mut cache);
1037 let cache_out = cache.map(|c| serde_json::to_string(&c).unwrap_or_default());
1038 (result, cache_out)
1039}
1040
1041pub fn compact_text(
1044 text: &str,
1045 limit: usize,
1046 cache_json: Option<&str>,
1047) -> (String, bool, Option<String>) {
1048 let mut cache: Option<Value> = cache_json.and_then(|s| serde_json::from_str(s).ok());
1049 let (result, truncated) = compact_text_mut(text, limit, &mut cache);
1050 let cache_out = cache.map(|c| serde_json::to_string(&c).unwrap_or_default());
1051 (result, truncated, cache_out)
1052}
1053
1054pub fn compact_discussion(
1061 discussion_json: &str,
1062 cache_json: Option<&str>,
1063 budget: Option<usize>,
1064 item_budget: Option<usize>,
1065) -> Result<(String, Option<String>), String> {
1066 let mut result: Value =
1067 serde_json::from_str(discussion_json).map_err(|e| format!("Invalid JSON: {}", e))?;
1068
1069 let mut cache: Option<Value> = cache_json.and_then(|s| serde_json::from_str(s).ok());
1070
1071 let budget = budget.unwrap_or(DEFAULT_BUDGET);
1072 let item_budget = item_budget.unwrap_or(DEFAULT_ITEM_BUDGET);
1073
1074 compact_discussion_internal(&mut result, &mut cache, budget, item_budget);
1075
1076 let out = serde_json::to_string_pretty(&result).unwrap_or_default();
1077 let cache_out = cache.map(|c| serde_json::to_string(&c).unwrap_or_default());
1078 Ok((out, cache_out))
1079}