Skip to main content

mcp_methods/
compact.rs

1use regex::Regex;
2use serde_json::Value;
3use std::sync::LazyLock;
4
5static SUMMARY_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"(?i)</?summary[^>]*>").unwrap());
6static LANG_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^```(\w*)").unwrap());
7
8/// Case-insensitive ASCII prefix check that is safe for multi-byte UTF-8 strings.
9fn starts_with_ignore_ascii_case(s: &str, prefix: &str) -> bool {
10    s.len() >= prefix.len() && s.as_bytes()[..prefix.len()].eq_ignore_ascii_case(prefix.as_bytes())
11}
12
13/// Find the largest valid char boundary <= `pos` in `s`.
14pub fn safe_byte_index(s: &str, pos: usize) -> usize {
15    let pos = pos.min(s.len());
16    // Walk backwards to find a char boundary
17    let mut i = pos;
18    while i > 0 && !s.is_char_boundary(i) {
19        i -= 1;
20    }
21    i
22}
23
24// ---------------------------------------------------------------------------
25// Compaction constants
26// ---------------------------------------------------------------------------
27
28const CODE_BLOCK_MAX_LINES: usize = 20;
29const CODE_BLOCK_KEEP: usize = 5;
30const MAINTAINER_LIMIT: usize = 5_000;
31const COMMENT_PREVIEW_CHARS: usize = 500;
32const REVIEW_PREVIEW_LINES: usize = 3;
33const REVIEW_PREVIEW_CHARS: usize = 300;
34/// Individual patch collapse: patches above this are collapsed even in small diffs.
35const PATCH_INLINE_MAX_LINES: usize = 80;
36/// How many lines to keep as preview when collapsing an inline patch.
37const PATCH_INLINE_KEEP: usize = 20;
38
39const MAINTAINER_ROLES: &[&str] = &["OWNER", "MEMBER", "COLLABORATOR"];
40
41// Budget constants
42const DEFAULT_BUDGET: usize = 60_000;
43const DEFAULT_ITEM_BUDGET: usize = 15_000;
44/// Safety margin: consider budget hit when within 10% of limit.
45const BUDGET_MARGIN: f64 = 0.90;
46
47// Tier-specific thresholds
48const TIER5_PATCH_MAX_LINES: usize = 30;
49const TIER5_PATCH_KEEP: usize = 15;
50const TIER6_BODY_LIMIT: usize = 5_000;
51const TIER9_BODY_LIMIT: usize = 2_000;
52const TIER9_COMMENT_LIMIT: usize = 200;
53const TIER9_REVIEW_CHARS: usize = 150;
54
55// Thread digest constants (for huge discussions)
56const HUGE_THREAD_THRESHOLD: usize = 50;
57const DIGEST_HEAD: usize = 5;
58const DIGEST_TAIL: usize = 5;
59const DIGEST_MAINTAINER_MAX: usize = 15;
60const DIGEST_MAINTAINER_CHARS: usize = 300;
61
62// ---------------------------------------------------------------------------
63// Size estimation (mirrors github.rs estimate_json_size)
64// ---------------------------------------------------------------------------
65
66fn estimate_size(val: &Value) -> usize {
67    crate::github::estimate_json_size(val)
68}
69
70// ---------------------------------------------------------------------------
71// Internal text helpers (unchanged from before)
72// ---------------------------------------------------------------------------
73
74/// Collapse large fenced code blocks and <details> sections, mutating cache in place.
75pub fn collapse_code_blocks_mut(text: &str, cache: &mut Option<Value>) -> String {
76    if text.is_empty() {
77        return text.to_string();
78    }
79
80    let lines: Vec<&str> = text.split('\n').collect();
81    let mut out: Vec<String> = Vec::new();
82    let mut i = 0;
83
84    while i < lines.len() {
85        let stripped = lines[i].trim();
86
87        // Collapse <details> blocks
88        if starts_with_ignore_ascii_case(stripped, "<details") {
89            let mut j = i + 1;
90            let mut summary = String::new();
91            while j < lines.len() {
92                let s = lines[j].trim();
93                if summary.is_empty() && starts_with_ignore_ascii_case(s, "<summary") {
94                    summary = SUMMARY_RE.replace_all(s, "").trim().to_string();
95                }
96                if starts_with_ignore_ascii_case(s, "</details") {
97                    break;
98                }
99                j += 1;
100            }
101            let hidden = if j > i { j - i - 1 } else { 0 };
102            if hidden > 3 {
103                let label = if summary.is_empty() {
104                    "collapsed section".to_string()
105                } else {
106                    summary
107                };
108                if let Some(ref mut c) = cache {
109                    let n = c.get("_n").and_then(|v| v.as_u64()).unwrap_or(0) + 1;
110                    c["_n"] = Value::from(n);
111                    let eid = format!("details_{}", n);
112                    let content: String = lines[(i + 1)..j].join("\n");
113                    c[&eid] = serde_json::json!({
114                        "type": "details",
115                        "summary": label,
116                        "total_lines": hidden,
117                        "content": content,
118                    });
119                    out.push(format!("[{} — {} lines hidden, id:{}]", label, hidden, eid));
120                } else {
121                    out.push(format!("[{} — {} lines hidden]", label, hidden));
122                }
123                i = (j + 1).min(lines.len());
124                continue;
125            }
126        }
127
128        // Collapse large fenced code blocks
129        if stripped.starts_with("```") {
130            let fence_line = lines[i];
131            let mut j = i + 1;
132            while j < lines.len() && !lines[j].trim().starts_with("```") {
133                j += 1;
134            }
135            let has_close = j < lines.len();
136            let end = if has_close { j + 1 } else { j };
137            let inner = end - i - if has_close { 2 } else { 1 };
138
139            if inner > CODE_BLOCK_MAX_LINES {
140                let hidden = inner - 2 * CODE_BLOCK_KEEP;
141
142                if let Some(ref mut c) = cache {
143                    let n = c.get("_n").and_then(|v| v.as_u64()).unwrap_or(0) + 1;
144                    c["_n"] = Value::from(n);
145                    let eid = format!("cb_{}", n);
146                    let lang = LANG_RE
147                        .captures(fence_line.trim())
148                        .and_then(|cap| cap.get(1))
149                        .map(|m| m.as_str().to_string())
150                        .unwrap_or_default();
151                    let content_end = if has_close { j } else { end };
152                    let content: String = lines[(i + 1)..content_end].join("\n");
153                    c[&eid] = serde_json::json!({
154                        "type": "code_block",
155                        "language": lang,
156                        "total_lines": inner,
157                        "content": content,
158                    });
159                    out.push(format!("{} [id:{}, {} lines]", fence_line, eid, inner));
160                } else {
161                    out.push(fence_line.to_string());
162                }
163
164                // Keep first CODE_BLOCK_KEEP lines
165                for line in lines
166                    .iter()
167                    .take((i + 1 + CODE_BLOCK_KEEP).min(lines.len()))
168                    .skip(i + 1)
169                {
170                    out.push(line.to_string());
171                }
172                out.push(format!("  ... ({} lines hidden)", hidden));
173
174                // Keep last CODE_BLOCK_KEEP lines + closing fence
175                if has_close {
176                    let start = j.saturating_sub(CODE_BLOCK_KEEP);
177                    for line in lines.iter().take(j).skip(start) {
178                        out.push(line.to_string());
179                    }
180                    out.push(lines[j].to_string());
181                } else {
182                    let start = end.saturating_sub(CODE_BLOCK_KEEP);
183                    for line in lines.iter().take(end).skip(start) {
184                        out.push(line.to_string());
185                    }
186                }
187            } else {
188                for line in lines.iter().take(end).skip(i) {
189                    out.push(line.to_string());
190                }
191            }
192            i = end;
193            continue;
194        }
195
196        out.push(lines[i].to_string());
197        i += 1;
198    }
199
200    out.join("\n")
201}
202
203/// Collapse code blocks then truncate if over limit, mutating cache in place.
204/// Returns (text, was_truncated).
205pub fn compact_text_mut(text: &str, limit: usize, cache: &mut Option<Value>) -> (String, bool) {
206    if text.is_empty() {
207        return (String::new(), false);
208    }
209    let collapsed = collapse_code_blocks_mut(text, cache);
210    if collapsed.len() > limit {
211        let truncated = format!(
212            "{}…[truncated]",
213            &collapsed[..safe_byte_index(&collapsed, limit)]
214        );
215        (truncated, true)
216    } else {
217        (collapsed, false)
218    }
219}
220
221// ---------------------------------------------------------------------------
222// Tier helper functions — each modifies `result` in place
223// ---------------------------------------------------------------------------
224
225/// Tier 0a: Filter bot comments. Returns count of bots removed.
226fn filter_bot_comments(result: &mut Value) -> usize {
227    if let Some(comments) = result.get_mut("comments").and_then(|v| v.as_array_mut()) {
228        let original_len = comments.len();
229        comments.retain(|c| {
230            c.get("author")
231                .and_then(|a| a.as_str())
232                .map(|a| !a.ends_with("[bot]"))
233                .unwrap_or(true)
234        });
235        let bot_count = original_len - comments.len();
236        if bot_count > 0 {
237            result["_bot_comments_hidden"] = Value::from(bot_count as u64);
238        }
239        bot_count
240    } else {
241        0
242    }
243}
244
245/// Tier 0b: Collapse code blocks in body text only (not truncation, just block collapsing).
246fn collapse_body_code_blocks(result: &mut Value, cache: &mut Option<Value>) {
247    if let Some(body) = result
248        .get("body")
249        .and_then(|v| v.as_str())
250        .map(|s| s.to_string())
251    {
252        let collapsed = collapse_code_blocks_mut(&body, cache);
253        result["body"] = Value::String(collapsed);
254    }
255}
256
257/// Tier 1: Collapse code blocks in all comment bodies.
258fn collapse_comment_code_blocks(result: &mut Value, cache: &mut Option<Value>) {
259    if let Some(comments) = result.get_mut("comments").and_then(|v| v.as_array_mut()) {
260        for c in comments.iter_mut() {
261            if let Some(body) = c
262                .get("body")
263                .and_then(|v| v.as_str())
264                .map(|s| s.to_string())
265            {
266                let collapsed = collapse_code_blocks_mut(&body, cache);
267                c["body"] = Value::String(collapsed);
268            }
269        }
270    }
271}
272
273/// Tier 2: Truncate non-maintainer comments.
274fn truncate_non_maintainer_comments(result: &mut Value, limit: usize, cache: &mut Option<Value>) {
275    if let Some(comments) = result.get_mut("comments").and_then(|v| v.as_array_mut()) {
276        for c in comments.iter_mut() {
277            let is_maintainer = c
278                .get("author_association")
279                .and_then(|a| a.as_str())
280                .map(|a| MAINTAINER_ROLES.contains(&a))
281                .unwrap_or(false);
282            if is_maintainer {
283                continue;
284            }
285            truncate_comment(c, limit, cache);
286        }
287    }
288}
289
290/// Tier 3/5: Collapse patches over a line threshold.
291fn collapse_patches_over(
292    result: &mut Value,
293    max_lines: usize,
294    keep_lines: usize,
295    cache: &mut Option<Value>,
296) {
297    if let Some(files) = result.get_mut("files").and_then(|v| v.as_array_mut()) {
298        for f in files.iter_mut() {
299            if let Some(obj) = f.as_object_mut() {
300                let patch_text = match obj.get("patch").and_then(|v| v.as_str()) {
301                    Some(p) if !p.is_empty() => p.to_string(),
302                    _ => continue,
303                };
304                let total_lines = patch_text.matches('\n').count() + 1;
305                if total_lines <= max_lines {
306                    continue;
307                }
308
309                let filename = obj
310                    .get("filename")
311                    .and_then(|v| v.as_str())
312                    .unwrap_or("")
313                    .to_string();
314                let additions = obj.get("additions").and_then(|v| v.as_u64()).unwrap_or(0);
315                let deletions = obj.get("deletions").and_then(|v| v.as_u64()).unwrap_or(0);
316
317                let eid = ensure_patch_cached(
318                    obj,
319                    &patch_text,
320                    &filename,
321                    additions,
322                    deletions,
323                    total_lines,
324                    cache,
325                );
326
327                obj.remove("patch");
328                let preview: String = patch_text
329                    .split('\n')
330                    .take(keep_lines)
331                    .collect::<Vec<_>>()
332                    .join("\n");
333                obj.insert(
334                    "patch_preview".to_string(),
335                    Value::String(format!(
336                        "{}\n\n... [{} more lines]",
337                        preview,
338                        total_lines - keep_lines
339                    )),
340                );
341                if let Some(eid) = eid {
342                    obj.insert("patch_id".to_string(), Value::String(eid));
343                }
344            }
345        }
346    }
347}
348
349/// Tier 4: Truncate maintainer comments.
350fn truncate_maintainer_comments(result: &mut Value, limit: usize, cache: &mut Option<Value>) {
351    if let Some(comments) = result.get_mut("comments").and_then(|v| v.as_array_mut()) {
352        for c in comments.iter_mut() {
353            let is_maintainer = c
354                .get("author_association")
355                .and_then(|a| a.as_str())
356                .map(|a| MAINTAINER_ROLES.contains(&a))
357                .unwrap_or(false);
358            if !is_maintainer {
359                continue;
360            }
361            truncate_comment(c, limit, cache);
362        }
363    }
364}
365
366/// Tier 6: Truncate PR body.
367fn truncate_body(result: &mut Value, limit: usize, cache: &mut Option<Value>) {
368    if let Some(body) = result
369        .get("body")
370        .and_then(|v| v.as_str())
371        .map(|s| s.to_string())
372    {
373        if body.len() <= limit {
374            return;
375        }
376        let (compacted, truncated) = compact_text_mut(&body, limit, cache);
377        result["body"] = Value::String(compacted);
378        if truncated {
379            result["_body_truncated"] = Value::Bool(true);
380        }
381    }
382}
383
384/// Tier 7: Compact review inline comments to previews.
385fn compact_reviews(
386    result: &mut Value,
387    preview_lines: usize,
388    preview_chars: usize,
389    cache: &mut Option<Value>,
390) {
391    if let Some(reviews) = result.get_mut("reviews").and_then(|v| v.as_array_mut()) {
392        for review in reviews.iter_mut() {
393            let reviewer = review
394                .get("author")
395                .and_then(|a| a.as_str())
396                .unwrap_or("")
397                .to_string();
398            if let Some(inlines) = review
399                .get("inline_comments")
400                .and_then(|v| v.as_array())
401                .cloned()
402            {
403                if !inlines.is_empty() {
404                    let compacted: Vec<Value> = inlines
405                        .iter()
406                        .map(|ic| {
407                            compact_single_review_comment(
408                                ic,
409                                &reviewer,
410                                preview_lines,
411                                preview_chars,
412                                cache,
413                            )
414                        })
415                        .collect();
416                    review["inline_comments"] = Value::Array(compacted);
417                }
418            }
419        }
420    }
421}
422
423/// Tier 8: Remove all inline patches, keep only file metadata.
424fn remove_inline_patches(result: &mut Value, cache: &mut Option<Value>) {
425    if let Some(files) = result.get_mut("files").and_then(|v| v.as_array_mut()) {
426        for f in files.iter_mut() {
427            if let Some(obj) = f.as_object_mut() {
428                if let Some(patch_text) = obj
429                    .get("patch")
430                    .and_then(|v| v.as_str())
431                    .map(|s| s.to_string())
432                {
433                    if !patch_text.is_empty() {
434                        let filename = obj
435                            .get("filename")
436                            .and_then(|v| v.as_str())
437                            .unwrap_or("")
438                            .to_string();
439                        let additions = obj.get("additions").and_then(|v| v.as_u64()).unwrap_or(0);
440                        let deletions = obj.get("deletions").and_then(|v| v.as_u64()).unwrap_or(0);
441                        let total_lines = patch_text.matches('\n').count() + 1;
442                        ensure_patch_cached(
443                            obj,
444                            &patch_text,
445                            &filename,
446                            additions,
447                            deletions,
448                            total_lines,
449                            cache,
450                        );
451                    }
452                }
453                obj.remove("patch");
454                obj.remove("patch_preview");
455            }
456        }
457    }
458}
459
460/// Per-item budget: cap any single oversized item. Returns descriptions of what was capped.
461fn enforce_per_item_limits(
462    result: &mut Value,
463    item_budget: usize,
464    cache: &mut Option<Value>,
465) -> Vec<String> {
466    let mut actions: Vec<String> = Vec::new();
467
468    // Cap body
469    if let Some(body) = result.get("body").and_then(|v| v.as_str()).map(|s| s.len()) {
470        if body > item_budget {
471            truncate_body(result, item_budget, cache);
472            actions.push("body truncated (over per-item limit)".into());
473        }
474    }
475
476    // Cap individual patches
477    let mut patches_capped = 0usize;
478    if let Some(files) = result.get_mut("files").and_then(|v| v.as_array_mut()) {
479        for f in files.iter_mut() {
480            if let Some(obj) = f.as_object_mut() {
481                let patch_len = obj
482                    .get("patch")
483                    .and_then(|v| v.as_str())
484                    .map(|s| s.len())
485                    .unwrap_or(0);
486                if patch_len > item_budget {
487                    let patch_text = obj.remove("patch").unwrap();
488                    let patch_str = patch_text.as_str().unwrap_or("");
489                    let total_lines = patch_str.matches('\n').count() + 1;
490                    let filename = obj
491                        .get("filename")
492                        .and_then(|v| v.as_str())
493                        .unwrap_or("")
494                        .to_string();
495                    let additions = obj.get("additions").and_then(|v| v.as_u64()).unwrap_or(0);
496                    let deletions = obj.get("deletions").and_then(|v| v.as_u64()).unwrap_or(0);
497
498                    let eid = ensure_patch_cached(
499                        obj,
500                        patch_str,
501                        &filename,
502                        additions,
503                        deletions,
504                        total_lines,
505                        cache,
506                    );
507
508                    let preview: String = patch_str
509                        .split('\n')
510                        .take(PATCH_INLINE_KEEP)
511                        .collect::<Vec<_>>()
512                        .join("\n");
513                    obj.insert(
514                        "patch_preview".to_string(),
515                        Value::String(format!(
516                            "{}\n\n... [{} more lines]",
517                            preview,
518                            total_lines.saturating_sub(PATCH_INLINE_KEEP)
519                        )),
520                    );
521                    if let Some(eid) = eid {
522                        obj.insert("patch_id".to_string(), Value::String(eid));
523                    }
524                    patches_capped += 1;
525                }
526            }
527        }
528    }
529    if patches_capped > 0 {
530        actions.push(format!(
531            "{} large patch(es) collapsed (over per-item limit)",
532            patches_capped
533        ));
534    }
535
536    // Cap individual comments
537    if let Some(comments) = result.get_mut("comments").and_then(|v| v.as_array_mut()) {
538        for c in comments.iter_mut() {
539            let body_len = c
540                .get("body")
541                .and_then(|v| v.as_str())
542                .map(|s| s.len())
543                .unwrap_or(0);
544            if body_len > item_budget {
545                truncate_comment(c, item_budget, cache);
546            }
547        }
548    }
549
550    actions
551}
552
553// ---------------------------------------------------------------------------
554// Shared helpers
555// ---------------------------------------------------------------------------
556
557/// Truncate a comment body in place, caching the original if truncated.
558fn truncate_comment(c: &mut Value, limit: usize, cache: &mut Option<Value>) {
559    if c.get("_truncated")
560        .and_then(|v| v.as_bool())
561        .unwrap_or(false)
562    {
563        return; // already truncated
564    }
565    let original_body = c
566        .get("body")
567        .and_then(|b| b.as_str())
568        .unwrap_or("")
569        .to_string();
570    if original_body.len() <= limit {
571        return;
572    }
573    let (compacted, truncated) = compact_text_mut(&original_body, limit, cache);
574    c["body"] = Value::String(compacted);
575    if truncated {
576        c["_truncated"] = Value::Bool(true);
577        if let Some(ref mut cv) = cache {
578            let n = cv.get("_n").and_then(|v| v.as_u64()).unwrap_or(0);
579            let eid = format!("comment_{}", n);
580            cv[&eid] = serde_json::json!({
581                "type": "comment",
582                "author": c.get("author").and_then(|a| a.as_str()).unwrap_or(""),
583                "total_lines": original_body.matches('\n').count() + 1,
584                "content": original_body,
585            });
586            c["_element_id"] = Value::String(eid);
587        }
588    }
589}
590
591/// Ensure a patch is cached, returning the element_id. Checks if patch_id already set.
592fn ensure_patch_cached(
593    obj: &mut serde_json::Map<String, Value>,
594    patch_text: &str,
595    filename: &str,
596    additions: u64,
597    deletions: u64,
598    total_lines: usize,
599    cache: &mut Option<Value>,
600) -> Option<String> {
601    if let Some(existing) = obj.get("patch_id").and_then(|v| v.as_str()) {
602        return Some(existing.to_string());
603    }
604    if let Some(ref mut c) = cache {
605        let n = c.get("_n").and_then(|v| v.as_u64()).unwrap_or(0) + 1;
606        c["_n"] = Value::from(n);
607        let eid = format!("patch_{}", n);
608        c[&eid] = serde_json::json!({
609            "type": "patch",
610            "filename": filename,
611            "additions": additions,
612            "deletions": deletions,
613            "total_lines": total_lines,
614            "content": patch_text,
615        });
616        Some(eid)
617    } else {
618        None
619    }
620}
621
622/// Compact a single review inline comment into a preview entry.
623fn compact_single_review_comment(
624    ic: &Value,
625    reviewer: &str,
626    preview_lines: usize,
627    preview_chars: usize,
628    cache: &mut Option<Value>,
629) -> Value {
630    let body = ic.get("body").and_then(|b| b.as_str()).unwrap_or("");
631    let preview: String = {
632        let lines: Vec<&str> = body.split('\n').collect();
633        let kept: String = lines[..lines.len().min(preview_lines)].join("\n");
634        if kept.len() > preview_chars {
635            let mut s: String = kept.chars().take(preview_chars).collect();
636            s.push_str("...");
637            s
638        } else if lines.len() > preview_lines {
639            format!("{}...", kept)
640        } else {
641            kept
642        }
643    };
644    let replies = ic
645        .get("replies")
646        .and_then(|r| r.as_array())
647        .map(|r| r.len())
648        .unwrap_or(0);
649    let path = ic.get("path").and_then(|p| p.as_str()).unwrap_or("");
650
651    let eid = if let Some(ref mut c) = cache {
652        let n = c.get("_n").and_then(|v| v.as_u64()).unwrap_or(0) + 1;
653        c["_n"] = Value::from(n);
654        let eid = format!("review_{}", n);
655        c[&eid] = serde_json::json!({
656            "type": "review_comment",
657            "author": reviewer,
658            "path": path,
659            "line": ic.get("line"),
660            "total_lines": body.matches('\n').count() + 1,
661            "content": body,
662            "replies": ic.get("replies"),
663        });
664        Some(eid)
665    } else {
666        None
667    };
668
669    let mut entry = serde_json::json!({
670        "path": path,
671        "line": ic.get("line"),
672        "preview": preview,
673        "replies": replies,
674    });
675    if let Some(eid) = eid {
676        entry["_element_id"] = Value::String(eid);
677    }
678    entry
679}
680
681// ---------------------------------------------------------------------------
682// ---------------------------------------------------------------------------
683// Thread digest for huge discussions
684// ---------------------------------------------------------------------------
685
686/// For discussions with many comments, replace the flat array with a digest:
687/// first N + maintainer highlights from middle + last M, caching the full middle.
688fn build_thread_digest(result: &mut Value, cache: &mut Option<Value>) -> String {
689    let comments = match result.get_mut("comments").and_then(|v| v.as_array_mut()) {
690        Some(c) => c,
691        None => return String::new(),
692    };
693
694    let total = comments.len();
695    let head = DIGEST_HEAD.min(total);
696    let tail = DIGEST_TAIL.min(total.saturating_sub(head));
697    let middle_start = head;
698    let middle_end = total.saturating_sub(tail);
699
700    if middle_start >= middle_end {
701        return String::new(); // not enough comments for a meaningful digest
702    }
703
704    // Clone the full middle for caching before we mutate
705    let middle_comments: Vec<Value> = comments[middle_start..middle_end].to_vec();
706    let middle_count = middle_comments.len();
707
708    // Cache individual middle comments and extract maintainer highlights
709    let mut maintainer_highlights: Vec<Value> = Vec::new();
710    let mut maintainer_total = 0usize;
711    for (i, c) in middle_comments.iter().enumerate() {
712        let eid = format!("comment_{}", middle_start + i);
713
714        // Cache the full comment for drill-down
715        if let Some(ref mut cache_obj) = cache {
716            if let Some(obj) = cache_obj.as_object_mut() {
717                let mut cached = c.clone();
718                cached["_index"] = Value::Number((middle_start + i).into());
719                obj.insert(
720                    eid.clone(),
721                    serde_json::json!({
722                        "type": "comment",
723                        "content": cached,
724                    }),
725                );
726            }
727        }
728
729        let assoc = c
730            .get("author_association")
731            .and_then(|v| v.as_str())
732            .unwrap_or("");
733        if MAINTAINER_ROLES.contains(&assoc) {
734            maintainer_total += 1;
735            if maintainer_highlights.len() < DIGEST_MAINTAINER_MAX {
736                let mut highlight = c.clone();
737                // Truncate body for the inline preview
738                if let Some(body) = highlight.get("body").and_then(|v| v.as_str()) {
739                    if body.len() > DIGEST_MAINTAINER_CHARS {
740                        let cut = safe_byte_index(body, DIGEST_MAINTAINER_CHARS);
741                        highlight["body"] = Value::String(format!("{}…", &body[..cut]));
742                    }
743                }
744                highlight["_element_id"] = Value::String(eid);
745                highlight["_index"] = Value::Number((middle_start + i).into());
746                maintainer_highlights.push(highlight);
747            }
748        }
749    }
750
751    // Date range for the middle
752    let first_date = middle_comments
753        .first()
754        .and_then(|c| c.get("created_at"))
755        .and_then(|v| v.as_str())
756        .unwrap_or("?");
757    let last_date = middle_comments
758        .last()
759        .and_then(|c| c.get("created_at"))
760        .and_then(|v| v.as_str())
761        .unwrap_or("?");
762
763    // Build the replacement array
764    let head_comments: Vec<Value> = comments[..head].to_vec();
765    let tail_comments: Vec<Value> = comments[total - tail..].to_vec();
766
767    let mut digest: Vec<Value> = Vec::new();
768    digest.extend(head_comments);
769
770    // System note about the gap
771    let gap_msg = format!(
772        "--- {middle_count} comments omitted ({maintainer_total} from maintainers). \
773         Date range: {first_date} to {last_date}. \
774         Use element_id='comments_middle' with grep='pattern' to search. ---"
775    );
776    digest.push(serde_json::json!({
777        "author": "[system]",
778        "body": gap_msg,
779    }));
780
781    // Maintainer highlights
782    if !maintainer_highlights.is_empty() {
783        digest.extend(maintainer_highlights);
784        digest.push(serde_json::json!({
785            "author": "[system]",
786            "body": "--- end maintainer highlights, recent comments follow ---",
787        }));
788    }
789
790    digest.extend(tail_comments);
791
792    // Cache the full middle for drill-down (with _index for grep metadata)
793    if let Some(ref mut cache_obj) = cache {
794        if let Some(obj) = cache_obj.as_object_mut() {
795            let indexed: Vec<Value> = middle_comments
796                .into_iter()
797                .enumerate()
798                .map(|(i, mut c)| {
799                    c["_index"] = Value::Number((middle_start + i).into());
800                    c
801                })
802                .collect();
803            obj.insert(
804                "comments_middle".to_string(),
805                serde_json::json!({
806                    "type": "comment_segment",
807                    "label": "middle",
808                    "comment_count": middle_count,
809                    "content": Value::Array(indexed),
810                }),
811            );
812        }
813    }
814
815    // Replace comments array
816    *comments = digest;
817
818    let comment_count = result
819        .get("comment_count")
820        .and_then(|v| v.as_u64())
821        .unwrap_or(total as u64);
822
823    format!(
824        "thread digest ({} total comments, {} shown inline, {} maintainer highlights)",
825        comment_count,
826        head + tail,
827        maintainer_total.min(DIGEST_MAINTAINER_MAX),
828    )
829}
830
831// ---------------------------------------------------------------------------
832// Main adaptive compaction function
833// ---------------------------------------------------------------------------
834
835/// Compact a discussion JSON, using budget-based adaptive compaction.
836///
837/// Starts with full content. If over budget, iteratively applies compaction
838/// tiers (lowest-value first) until the output fits.
839fn compact_discussion_internal(
840    result: &mut Value,
841    cache: &mut Option<Value>,
842    budget: usize,
843    item_budget: usize,
844) -> Vec<String> {
845    let effective_budget = (budget as f64 * BUDGET_MARGIN) as usize;
846
847    // --- Tier 0: Always-applied cleanup ---
848    let mut compacted_sections: Vec<String> = Vec::new();
849
850    // 0a: Filter bot comments
851    let bot_count = filter_bot_comments(result);
852    if bot_count > 0 {
853        compacted_sections.push(format!("{} bot comments filtered", bot_count));
854    }
855
856    // 0b: Collapse <details> blocks in body
857    collapse_body_code_blocks(result, cache);
858
859    // --- Thread digest for huge discussions (proactive, before budget check) ---
860    let comment_count = result
861        .get("comments")
862        .and_then(|v| v.as_array())
863        .map(|a| a.len())
864        .unwrap_or(0);
865
866    if comment_count > HUGE_THREAD_THRESHOLD {
867        let digest_desc = build_thread_digest(result, cache);
868        if !digest_desc.is_empty() {
869            compacted_sections.push(digest_desc);
870        }
871    }
872
873    // --- Per-item budget enforcement (pre-pass) ---
874    let item_actions = enforce_per_item_limits(result, item_budget, cache);
875    compacted_sections.extend(item_actions);
876
877    // --- Check if under budget ---
878    let mut size = estimate_size(result);
879    if size <= effective_budget {
880        // Everything fits! Cache patches for drill-down even though they're inline.
881        cache_all_patches(result, cache);
882        if !compacted_sections.is_empty() {
883            result["_compaction"] = Value::String(format!(
884                "{}. Use element_id to drill down.",
885                compacted_sections.join("; ")
886            ));
887        }
888        return compacted_sections;
889    }
890
891    // --- Iterative tier compaction ---
892    let mut tier_reached: u8 = 0;
893
894    // Tier 1: Code blocks in comments
895    if size > effective_budget {
896        tier_reached = 1;
897        collapse_comment_code_blocks(result, cache);
898        compacted_sections.push("code blocks collapsed".into());
899        size = estimate_size(result);
900    }
901
902    // Tier 2: Non-maintainer comments truncated
903    if size > effective_budget {
904        tier_reached = 2;
905        truncate_non_maintainer_comments(result, COMMENT_PREVIEW_CHARS, cache);
906        compacted_sections.push("non-maintainer comments truncated".into());
907        size = estimate_size(result);
908    }
909
910    // Tier 3: Large patches (>80 lines) collapsed
911    if size > effective_budget {
912        tier_reached = 3;
913        collapse_patches_over(result, PATCH_INLINE_MAX_LINES, PATCH_INLINE_KEEP, cache);
914        compacted_sections.push("large patches (>80 lines) collapsed".into());
915        size = estimate_size(result);
916    }
917
918    // Tier 4: Maintainer comments truncated
919    if size > effective_budget {
920        tier_reached = 4;
921        truncate_maintainer_comments(result, MAINTAINER_LIMIT, cache);
922        compacted_sections.push("maintainer comments truncated".into());
923        size = estimate_size(result);
924    }
925
926    // Tier 5: Medium patches (>30 lines) collapsed
927    if size > effective_budget {
928        tier_reached = 5;
929        collapse_patches_over(result, TIER5_PATCH_MAX_LINES, TIER5_PATCH_KEEP, cache);
930        compacted_sections.push("medium patches (>30 lines) collapsed".into());
931        size = estimate_size(result);
932    }
933
934    // Tier 6: PR body truncated
935    if size > effective_budget {
936        tier_reached = 6;
937        truncate_body(result, TIER6_BODY_LIMIT, cache);
938        compacted_sections.push("PR body truncated".into());
939        size = estimate_size(result);
940    }
941
942    // Tier 7: Review inline comments compacted
943    if size > effective_budget {
944        tier_reached = 7;
945        compact_reviews(result, REVIEW_PREVIEW_LINES, REVIEW_PREVIEW_CHARS, cache);
946        compacted_sections.push("review comments compacted".into());
947        size = estimate_size(result);
948    }
949
950    // Tier 8: All patches → summary only
951    if size > effective_budget {
952        tier_reached = 8;
953        remove_inline_patches(result, cache);
954        compacted_sections.push("all patches removed (use patch_id to drill down)".into());
955        size = estimate_size(result);
956    }
957
958    // Tier 9: Aggressive truncation
959    if size > effective_budget {
960        tier_reached = 9;
961        truncate_body(result, TIER9_BODY_LIMIT, cache);
962        truncate_non_maintainer_comments(result, TIER9_COMMENT_LIMIT, cache);
963        truncate_maintainer_comments(result, TIER9_COMMENT_LIMIT, cache);
964        compact_reviews(result, 1, TIER9_REVIEW_CHARS, cache);
965        compacted_sections.push("aggressive compaction applied".into());
966        let _ = estimate_size(result);
967    }
968
969    // Cache any patches still inline (they survived compaction)
970    cache_all_patches(result, cache);
971
972    // Add compaction info
973    if tier_reached > 0 {
974        result["_compaction"] = Value::String(format!(
975            "Budget compaction (tier {}). {}. Use element_id to drill down.",
976            tier_reached,
977            compacted_sections.join("; ")
978        ));
979    }
980
981    compacted_sections
982}
983
984/// Cache all inline patches that don't have a patch_id yet (for drill-down support).
985fn cache_all_patches(result: &mut Value, cache: &mut Option<Value>) {
986    if let Some(files) = result.get_mut("files").and_then(|v| v.as_array_mut()) {
987        for f in files.iter_mut() {
988            if let Some(obj) = f.as_object_mut() {
989                if obj.contains_key("patch_id") {
990                    continue;
991                }
992                if let Some(patch_text) = obj
993                    .get("patch")
994                    .and_then(|v| v.as_str())
995                    .map(|s| s.to_string())
996                {
997                    if patch_text.is_empty() {
998                        continue;
999                    }
1000                    let filename = obj
1001                        .get("filename")
1002                        .and_then(|v| v.as_str())
1003                        .unwrap_or("")
1004                        .to_string();
1005                    let additions = obj.get("additions").and_then(|v| v.as_u64()).unwrap_or(0);
1006                    let deletions = obj.get("deletions").and_then(|v| v.as_u64()).unwrap_or(0);
1007                    let total_lines = patch_text.matches('\n').count() + 1;
1008                    let eid = ensure_patch_cached(
1009                        obj,
1010                        &patch_text,
1011                        &filename,
1012                        additions,
1013                        deletions,
1014                        total_lines,
1015                        cache,
1016                    );
1017                    if let Some(eid) = eid {
1018                        obj.insert("patch_id".to_string(), Value::String(eid));
1019                    }
1020                }
1021            }
1022        }
1023    }
1024}
1025
1026// ---------------------------------------------------------------------------
1027// PyO3 wrappers (serialize/deserialize cache at boundary)
1028// ---------------------------------------------------------------------------
1029
1030/// Collapse large fenced code blocks and <details> sections in text.
1031///
1032/// When cache_json is provided (a JSON object string), collapsed elements are stored with IDs.
1033/// Returns (collapsed_text, updated_cache_json).
1034pub fn collapse_code_blocks(text: &str, cache_json: Option<&str>) -> (String, Option<String>) {
1035    let mut cache: Option<Value> = cache_json.and_then(|s| serde_json::from_str(s).ok());
1036    let result = collapse_code_blocks_mut(text, &mut cache);
1037    let cache_out = cache.map(|c| serde_json::to_string(&c).unwrap_or_default());
1038    (result, cache_out)
1039}
1040
1041/// Collapse code blocks then truncate if over limit.
1042/// Returns (text, was_truncated, cache_json).
1043pub fn compact_text(
1044    text: &str,
1045    limit: usize,
1046    cache_json: Option<&str>,
1047) -> (String, bool, Option<String>) {
1048    let mut cache: Option<Value> = cache_json.and_then(|s| serde_json::from_str(s).ok());
1049    let (result, truncated) = compact_text_mut(text, limit, &mut cache);
1050    let cache_out = cache.map(|c| serde_json::to_string(&c).unwrap_or_default());
1051    (result, truncated, cache_out)
1052}
1053
1054/// Compact a discussion JSON string using budget-based adaptive
1055/// compaction. Pure-Rust callable in both build modes.
1056///
1057/// Returns `Ok((compacted_json, cache_json))` on success or `Err(msg)`
1058/// for JSON parse errors. `budget`/`item_budget` control output size
1059/// limits (defaults: 60KB / 15KB).
1060pub fn compact_discussion(
1061    discussion_json: &str,
1062    cache_json: Option<&str>,
1063    budget: Option<usize>,
1064    item_budget: Option<usize>,
1065) -> Result<(String, Option<String>), String> {
1066    let mut result: Value =
1067        serde_json::from_str(discussion_json).map_err(|e| format!("Invalid JSON: {}", e))?;
1068
1069    let mut cache: Option<Value> = cache_json.and_then(|s| serde_json::from_str(s).ok());
1070
1071    let budget = budget.unwrap_or(DEFAULT_BUDGET);
1072    let item_budget = item_budget.unwrap_or(DEFAULT_ITEM_BUDGET);
1073
1074    compact_discussion_internal(&mut result, &mut cache, budget, item_budget);
1075
1076    let out = serde_json::to_string_pretty(&result).unwrap_or_default();
1077    let cache_out = cache.map(|c| serde_json::to_string(&c).unwrap_or_default());
1078    Ok((out, cache_out))
1079}