Skip to main content

devboy_format_pipeline/
page_index.rs

1//! Page index generation for large results.
2//!
3//! When budget trimming drops items, generates a structured index
4//! describing what's on each page so the LLM can request specific pages.
5
6use devboy_core::{Comment, Discussion, FileDiff, Issue, MergeRequest};
7use std::collections::BTreeMap;
8
9/// A single page descriptor in the index.
10#[derive(Debug, Clone)]
11pub struct PageDescriptor {
12    /// Page number (1-based)
13    pub page: usize,
14    /// Human-readable summary of page contents
15    pub summary: String,
16    pub item_count: usize,
17    pub offset: usize,
18}
19
20/// Full page index for a result set.
21///
22/// Note: there is no "current page" concept because budget trimming may
23/// select non-contiguous items by priority. The shown items are selected
24/// by the trim strategy, not by sequential page boundaries.
25#[derive(Debug, Clone)]
26pub struct PageIndex {
27    /// Total items across all pages
28    pub total_items: usize,
29    /// Items shown (selected by budget trimming, may span multiple pages)
30    pub shown_items: usize,
31    pub total_pages: usize,
32    /// Page descriptors
33    pub pages: Vec<PageDescriptor>,
34    /// Data type (e.g., "issues", "diffs", "discussions")
35    pub data_type: String,
36}
37
38impl PageIndex {
39    /// Render chunk index as a structured block.
40    ///
41    /// The output is designed to be read by an LLM agent that can decide
42    /// which chunks to fetch based on the descriptions.
43    pub fn to_toon(&self) -> String {
44        let mut lines = Vec::new();
45        lines.push(format!(
46            "[chunks] {}/{} {} in {} chunks:",
47            self.shown_items, self.total_items, self.data_type, self.total_pages
48        ));
49        for p in &self.pages {
50            let marker = if p.page == 1 {
51                " << returned in this response"
52            } else {
53                ""
54            };
55            lines.push(format!(
56                "  chunk {} (offset={}, limit={}): {}{}",
57                p.page, p.offset, p.item_count, p.summary, marker
58            ));
59        }
60        lines.push(
61            "[/chunks] Use `chunk: N` parameter to fetch a specific chunk. You may not need all chunks."
62                .to_string(),
63        );
64        lines.join("\n")
65    }
66}
67
68/// Default page size for chunking.
69const DEFAULT_PAGE_SIZE: usize = 20;
70
71/// Compute page size from the number of items that fit in budget.
72///
73/// Uses `included_items` (items that fit in one budget window) as page size.
74/// Falls back to `DEFAULT_PAGE_SIZE` when included_items is 0.
75fn compute_page_size(total_items: usize, included_items: usize) -> usize {
76    if included_items > 0 {
77        included_items
78    } else {
79        DEFAULT_PAGE_SIZE.min(total_items)
80    }
81}
82
83// =============================================================================
84// Type-specific page index builders
85// =============================================================================
86
87pub fn build_issues_index(issues: &[Issue], included_count: usize) -> PageIndex {
88    let total = issues.len();
89    let page_size = compute_page_size(total, included_count);
90    let total_pages = total.div_ceil(page_size);
91
92    let pages: Vec<PageDescriptor> = (0..total_pages)
93        .map(|page_idx| {
94            let offset = page_idx * page_size;
95            let end = (offset + page_size).min(total);
96            let page_issues = &issues[offset..end];
97            let item_count = page_issues.len();
98
99            // Summarize: count by state
100            let mut states: BTreeMap<&str, usize> = BTreeMap::new();
101            for issue in page_issues {
102                *states.entry(issue.state.as_str()).or_default() += 1;
103            }
104            let state_parts: Vec<String> =
105                states.iter().map(|(s, c)| format!("{} {}", c, s)).collect();
106            let summary = format!(
107                "issues #{}-{} ({})",
108                offset + 1,
109                end,
110                state_parts.join(", ")
111            );
112
113            PageDescriptor {
114                page: page_idx + 1,
115                summary,
116                item_count,
117                offset,
118            }
119        })
120        .collect();
121
122    PageIndex {
123        total_items: total,
124        shown_items: included_count,
125        total_pages,
126        pages,
127        data_type: "issues".to_string(),
128    }
129}
130
131pub fn build_merge_requests_index(mrs: &[MergeRequest], included_count: usize) -> PageIndex {
132    let total = mrs.len();
133    let page_size = compute_page_size(total, included_count);
134    let total_pages = total.div_ceil(page_size);
135
136    let pages: Vec<PageDescriptor> = (0..total_pages)
137        .map(|page_idx| {
138            let offset = page_idx * page_size;
139            let end = (offset + page_size).min(total);
140            let page_mrs = &mrs[offset..end];
141
142            let mut states: BTreeMap<&str, usize> = BTreeMap::new();
143            for mr in page_mrs {
144                *states.entry(mr.state.as_str()).or_default() += 1;
145            }
146            let state_parts: Vec<String> =
147                states.iter().map(|(s, c)| format!("{} {}", c, s)).collect();
148            let summary = format!("MRs #{}-{} ({})", offset + 1, end, state_parts.join(", "));
149
150            PageDescriptor {
151                page: page_idx + 1,
152                summary,
153                item_count: page_mrs.len(),
154                offset,
155            }
156        })
157        .collect();
158
159    PageIndex {
160        total_items: total,
161        shown_items: included_count,
162        total_pages,
163        pages,
164        data_type: "merge_requests".to_string(),
165    }
166}
167
168/// Build page index for diffs — grouped by directory.
169pub fn build_diffs_index(diffs: &[FileDiff], included_count: usize) -> PageIndex {
170    let total = diffs.len();
171    let page_size = compute_page_size(total, included_count);
172    let total_pages = total.div_ceil(page_size);
173
174    let pages: Vec<PageDescriptor> = (0..total_pages)
175        .map(|page_idx| {
176            let offset = page_idx * page_size;
177            let end = (offset + page_size).min(total);
178            let page_diffs = &diffs[offset..end];
179
180            // Group by top-level directory for summary
181            let mut dirs: BTreeMap<String, usize> = BTreeMap::new();
182            let mut total_additions: u32 = 0;
183            let mut total_deletions: u32 = 0;
184            for d in page_diffs {
185                let dir = extract_top_dir(&d.file_path);
186                *dirs.entry(dir).or_default() += 1;
187                total_additions += d.additions.unwrap_or(0);
188                total_deletions += d.deletions.unwrap_or(0);
189            }
190
191            let dir_parts: Vec<String> = dirs
192                .iter()
193                .map(|(d, c)| {
194                    if *c == 1 {
195                        format!("{d}/*")
196                    } else {
197                        format!("{d}/* ({c} files)")
198                    }
199                })
200                .collect();
201
202            let summary = format!(
203                "{} — +{}/-{}",
204                dir_parts.join(", "),
205                total_additions,
206                total_deletions
207            );
208
209            PageDescriptor {
210                page: page_idx + 1,
211                summary,
212                item_count: page_diffs.len(),
213                offset,
214            }
215        })
216        .collect();
217
218    PageIndex {
219        total_items: total,
220        shown_items: included_count,
221        total_pages,
222        pages,
223        data_type: "diffs".to_string(),
224    }
225}
226
227/// Build page index for discussions — grouped by resolved status.
228pub fn build_discussions_index(discussions: &[Discussion], included_count: usize) -> PageIndex {
229    let total = discussions.len();
230    let page_size = compute_page_size(total, included_count);
231    let total_pages = total.div_ceil(page_size);
232
233    let pages: Vec<PageDescriptor> = (0..total_pages)
234        .map(|page_idx| {
235            let offset = page_idx * page_size;
236            let end = (offset + page_size).min(total);
237            let page_disc = &discussions[offset..end];
238
239            let resolved = page_disc.iter().filter(|d| d.resolved).count();
240            let unresolved = page_disc.len() - resolved;
241
242            let summary = format!(
243                "{} discussions ({} unresolved, {} resolved)",
244                page_disc.len(),
245                unresolved,
246                resolved
247            );
248
249            PageDescriptor {
250                page: page_idx + 1,
251                summary,
252                item_count: page_disc.len(),
253                offset,
254            }
255        })
256        .collect();
257
258    PageIndex {
259        total_items: total,
260        shown_items: included_count,
261        total_pages,
262        pages,
263        data_type: "discussions".to_string(),
264    }
265}
266
267/// Build page index for comments — chronological.
268pub fn build_comments_index(comments: &[Comment], included_count: usize) -> PageIndex {
269    let total = comments.len();
270    let page_size = compute_page_size(total, included_count);
271    let total_pages = total.div_ceil(page_size);
272
273    let pages: Vec<PageDescriptor> = (0..total_pages)
274        .map(|page_idx| {
275            let offset = page_idx * page_size;
276            let end = (offset + page_size).min(total);
277            let page_comments = &comments[offset..end];
278
279            let summary = format!("comments {}-{}", offset + 1, end);
280
281            PageDescriptor {
282                page: page_idx + 1,
283                summary,
284                item_count: page_comments.len(),
285                offset,
286            }
287        })
288        .collect();
289
290    PageIndex {
291        total_items: total,
292        shown_items: included_count,
293        total_pages,
294        pages,
295        data_type: "comments".to_string(),
296    }
297}
298
299// =============================================================================
300// Helpers
301// =============================================================================
302
303/// Extract top-level directory from a file path (first 3 segments).
304/// "src/app/modules/mcp/tools/foo.ts" → "src/app/modules"
305fn extract_top_dir(path: &str) -> String {
306    let parts: Vec<&str> = path.split('/').collect();
307    if parts.len() <= 2 {
308        // Short path: return parent dir or file itself
309        if parts.len() == 2 {
310            parts[0].to_string()
311        } else {
312            ".".to_string()
313        }
314    } else {
315        // Take first 3 levels for meaningful grouping
316        let depth = 3.min(parts.len() - 1);
317        parts[..depth].join("/")
318    }
319}
320
321#[cfg(test)]
322mod tests {
323    use super::*;
324
325    #[test]
326    fn test_extract_top_dir() {
327        assert_eq!(
328            extract_top_dir("src/app/modules/mcp/tools/foo.ts"),
329            "src/app/modules"
330        );
331        assert_eq!(extract_top_dir("README.md"), ".");
332        assert_eq!(extract_top_dir("src/main.rs"), "src");
333        assert_eq!(extract_top_dir("a/b/c/d/e.rs"), "a/b/c");
334    }
335
336    #[test]
337    fn test_page_index_toon_output() {
338        let index = PageIndex {
339            total_items: 52,
340            shown_items: 15,
341            total_pages: 4,
342            pages: vec![
343                PageDescriptor {
344                    page: 1,
345                    summary: "src/app/modules/* (8 files) — +120/-45".to_string(),
346                    item_count: 15,
347                    offset: 0,
348                },
349                PageDescriptor {
350                    page: 2,
351                    summary: "apps/dev-boy-e2e/* (17 files) — +340/-12".to_string(),
352                    item_count: 15,
353                    offset: 15,
354                },
355            ],
356            data_type: "diffs".to_string(),
357        };
358
359        let toon = index.to_toon();
360        assert!(toon.contains("[chunks] 15/52 diffs in 4 chunks:"));
361        assert!(toon.contains("chunk 1 (offset=0, limit=15):"));
362        assert!(toon.contains("<< returned in this response"));
363        assert!(toon.contains("chunk 2 (offset=15, limit=15):"));
364        assert!(toon.contains("[/chunks]"));
365        assert!(toon.contains("You may not need all chunks"));
366        // Only chunk 1 is marked as returned
367        let lines: Vec<&str> = toon
368            .lines()
369            .filter(|l| l.contains("returned in this response"))
370            .collect();
371        assert_eq!(lines.len(), 1, "Only chunk 1 should be marked as returned");
372    }
373
374    #[test]
375    fn test_build_diffs_index() {
376        let diffs: Vec<FileDiff> = (0..10)
377            .map(|i| FileDiff {
378                file_path: format!("src/app/file_{}.ts", i),
379                diff: format!("diff content {}", i),
380                additions: Some(10),
381                deletions: Some(5),
382                ..Default::default()
383            })
384            .collect();
385
386        let index = build_diffs_index(&diffs, 5);
387        assert_eq!(index.total_items, 10);
388        assert_eq!(index.total_pages, 2);
389        assert_eq!(index.pages[0].item_count, 5);
390        assert_eq!(index.pages[0].offset, 0);
391        assert_eq!(index.pages[1].offset, 5);
392    }
393}