Skip to main content

devboy_format_pipeline/
tree.rs

1//! Trim tree (TrimTree) for budget trimming.
2//!
3//! The JSON response is modeled as a rooted tree T = (V, E) where each node has:
4//! - weight -- cost in tokens (serialization without children)
5//! - value -- information value (depends on the strategy)
6//! - included -- whether the node is included in the final output
7//!
8//! Builders convert typed data into TrimNode trees with precomputed weights.
9
10use devboy_core::{Comment, Discussion, FileDiff, Issue, MergeRequest};
11
12use crate::token_counter::estimate_tokens;
13use crate::toon;
14
15/// Trim tree node.
16#[derive(Debug, Clone)]
17pub struct TrimNode {
18    /// Unique node identifier within the tree
19    pub id: usize,
20    pub kind: NodeKind,
21    /// Weight in tokens (serialization cost without children)
22    pub weight: usize,
23    /// Information value (0.0-1.0), assigned by the strategy
24    pub value: f64,
25    /// Child nodes
26    pub children: Vec<TrimNode>,
27    /// Whether the node is included in the final output
28    pub included: bool,
29}
30
31/// Node type in the trim tree.
32#[derive(Debug, Clone, PartialEq)]
33pub enum NodeKind {
34    /// Root node (collection)
35    Root,
36    /// Collection element (issue, MR, comment, diff)
37    Item {
38        /// Index of the element in the original array
39        index: usize,
40    },
41    /// Field within an element (description, body, diff content)
42    Field { name: String },
43    /// Text block (description body, diff content, log content)
44    Text,
45}
46
47impl TrimNode {
48    /// Create a new node.
49    pub fn new(id: usize, kind: NodeKind, weight: usize) -> Self {
50        Self {
51            id,
52            kind,
53            weight,
54            value: 1.0, // default value, overridden by the strategy
55            children: Vec::new(),
56            included: true,
57        }
58    }
59
60    /// Total number of nodes in the subtree (including self).
61    pub fn count_nodes(&self) -> usize {
62        1 + self.children.iter().map(|c| c.count_nodes()).sum::<usize>()
63    }
64
65    /// Total weight of the subtree (sum of weights of all included nodes).
66    pub fn total_weight(&self) -> usize {
67        if !self.included {
68            return 0;
69        }
70        self.weight
71            + self
72                .children
73                .iter()
74                .map(|c| c.total_weight())
75                .sum::<usize>()
76    }
77
78    /// Total value of the subtree (sum of value * weight for included nodes).
79    pub fn total_value(&self) -> f64 {
80        if !self.included {
81            return 0.0;
82        }
83        self.value * self.weight as f64 + self.children.iter().map(|c| c.total_value()).sum::<f64>()
84    }
85
86    /// Information density: value / weight.
87    pub fn density(&self) -> f64 {
88        if self.weight == 0 {
89            return 0.0;
90        }
91        self.value / self.weight as f64
92    }
93
94    /// Count of included Item nodes.
95    pub fn included_items_count(&self) -> usize {
96        let self_count = if self.included && matches!(self.kind, NodeKind::Item { .. }) {
97            1
98        } else {
99            0
100        };
101        self_count
102            + self
103                .children
104                .iter()
105                .map(|c| c.included_items_count())
106                .sum::<usize>()
107    }
108
109    /// Collect indices of included Item nodes.
110    pub fn included_item_indices(&self) -> Vec<usize> {
111        let mut indices = Vec::new();
112        self.collect_included_indices(&mut indices);
113        indices
114    }
115
116    fn collect_included_indices(&self, indices: &mut Vec<usize>) {
117        if self.included
118            && let NodeKind::Item { index } = &self.kind
119        {
120            indices.push(*index);
121        }
122        if self.included {
123            for child in &self.children {
124                child.collect_included_indices(indices);
125            }
126        }
127    }
128
129    /// Collect indices of excluded Item nodes (overflow).
130    pub fn excluded_item_indices(&self) -> Vec<usize> {
131        let mut indices = Vec::new();
132        self.collect_excluded_indices(&mut indices);
133        indices
134    }
135
136    fn collect_excluded_indices(&self, indices: &mut Vec<usize>) {
137        if !self.included
138            && let NodeKind::Item { index } = &self.kind
139        {
140            indices.push(*index);
141            // All descendants of an excluded node are also excluded
142        } else if self.included {
143            for child in &self.children {
144                child.collect_excluded_indices(indices);
145            }
146        }
147    }
148}
149
150// ============================================================================
151// ID Generator
152// ============================================================================
153
154/// Unique ID generator for tree nodes.
155struct IdGen(usize);
156
157impl IdGen {
158    fn new() -> Self {
159        Self(0)
160    }
161
162    fn next(&mut self) -> usize {
163        let id = self.0;
164        self.0 += 1;
165        id
166    }
167}
168
169// ============================================================================
170// Builders
171// ============================================================================
172
173/// Build a tree from a list of issues.
174///
175/// Structure: Root -> [Item(0), Item(1), ...].
176/// Each Item may contain a Field("description") if the description is long.
177pub fn build_issues_tree(issues: &[Issue]) -> TrimNode {
178    let mut id_gen = IdGen::new();
179    let mut root = TrimNode::new(id_gen.next(), NodeKind::Root, 0);
180
181    for (i, issue) in issues.iter().enumerate() {
182        let item_weight = estimate_item_tokens(issue);
183        let mut item = TrimNode::new(id_gen.next(), NodeKind::Item { index: i }, item_weight);
184
185        // If description > 100 chars -- extract into a separate Field for granular trimming
186        if let Some(desc) = &issue.description
187            && desc.len() > 100
188        {
189            let desc_weight = estimate_tokens(desc);
190            item.weight = item.weight.saturating_sub(desc_weight);
191            let field = TrimNode::new(
192                id_gen.next(),
193                NodeKind::Field {
194                    name: "description".into(),
195                },
196                desc_weight,
197            );
198            item.children.push(field);
199        }
200
201        root.children.push(item);
202    }
203
204    root
205}
206
207pub fn build_merge_requests_tree(mrs: &[MergeRequest]) -> TrimNode {
208    let mut id_gen = IdGen::new();
209    let mut root = TrimNode::new(id_gen.next(), NodeKind::Root, 0);
210
211    for (i, mr) in mrs.iter().enumerate() {
212        let item_weight = estimate_item_tokens(mr);
213        let mut item = TrimNode::new(id_gen.next(), NodeKind::Item { index: i }, item_weight);
214
215        if let Some(desc) = &mr.description
216            && desc.len() > 100
217        {
218            let desc_weight = estimate_tokens(desc);
219            item.weight = item.weight.saturating_sub(desc_weight);
220            let field = TrimNode::new(
221                id_gen.next(),
222                NodeKind::Field {
223                    name: "description".into(),
224                },
225                desc_weight,
226            );
227            item.children.push(field);
228        }
229
230        root.children.push(item);
231    }
232
233    root
234}
235
236/// Build a tree from a list of file diffs.
237///
238/// Diff content is extracted into Field("diff") for granular trimming by file type.
239pub fn build_diffs_tree(diffs: &[FileDiff]) -> TrimNode {
240    let mut id_gen = IdGen::new();
241    let mut root = TrimNode::new(id_gen.next(), NodeKind::Root, 0);
242
243    for (i, diff) in diffs.iter().enumerate() {
244        let item_weight = estimate_item_tokens(diff);
245        let mut item = TrimNode::new(id_gen.next(), NodeKind::Item { index: i }, item_weight);
246
247        // Diff content is always extracted into a separate node
248        if !diff.diff.is_empty() {
249            let diff_weight = estimate_tokens(&diff.diff);
250            item.weight = item.weight.saturating_sub(diff_weight);
251            let field = TrimNode::new(
252                id_gen.next(),
253                NodeKind::Field {
254                    name: "diff".into(),
255                },
256                diff_weight,
257            );
258            item.children.push(field);
259        }
260
261        root.children.push(item);
262    }
263
264    root
265}
266
267pub fn build_comments_tree(comments: &[Comment]) -> TrimNode {
268    let mut id_gen = IdGen::new();
269    let mut root = TrimNode::new(id_gen.next(), NodeKind::Root, 0);
270
271    for (i, comment) in comments.iter().enumerate() {
272        let item_weight = estimate_item_tokens(comment);
273        let mut item = TrimNode::new(id_gen.next(), NodeKind::Item { index: i }, item_weight);
274
275        // Body > 200 chars -- extract into a separate node
276        if comment.body.len() > 200 {
277            let body_weight = estimate_tokens(&comment.body);
278            item.weight = item.weight.saturating_sub(body_weight);
279            let field = TrimNode::new(
280                id_gen.next(),
281                NodeKind::Field {
282                    name: "body".into(),
283                },
284                body_weight,
285            );
286            item.children.push(field);
287        }
288
289        root.children.push(item);
290    }
291
292    root
293}
294
295/// Build a tree from a list of discussions.
296///
297/// Two-level structure: Root -> Discussion(Item) -> Comment(Item).
298pub fn build_discussions_tree(discussions: &[Discussion]) -> TrimNode {
299    let mut id_gen = IdGen::new();
300    let mut root = TrimNode::new(id_gen.next(), NodeKind::Root, 0);
301
302    for (i, discussion) in discussions.iter().enumerate() {
303        // Discussion weight = metadata (id, resolved, position) without comments
304        let metadata_weight = estimate_tokens(&format!(
305            "id:{} resolved:{}",
306            discussion.id, discussion.resolved
307        ));
308        let mut disc_node =
309            TrimNode::new(id_gen.next(), NodeKind::Item { index: i }, metadata_weight);
310
311        // Each comment is a separate child
312        for (j, comment) in discussion.comments.iter().enumerate() {
313            let comment_weight = estimate_item_tokens(comment);
314            let comment_node =
315                TrimNode::new(id_gen.next(), NodeKind::Item { index: j }, comment_weight);
316            disc_node.children.push(comment_node);
317        }
318
319        root.children.push(disc_node);
320    }
321
322    root
323}
324
325/// Estimate tokens for a single element via TOON encode.
326fn estimate_item_tokens<T: serde::Serialize>(item: &T) -> usize {
327    match toon::encode_value(item) {
328        Ok(encoded) => estimate_tokens(&encoded),
329        Err(_) => {
330            // Fallback: estimate via JSON
331            match serde_json::to_string(item) {
332                Ok(json) => estimate_tokens(&json),
333                Err(_) => 50, // minimum estimate
334            }
335        }
336    }
337}
338
339#[cfg(test)]
340mod tests {
341    use super::*;
342    use devboy_core::User;
343
344    fn sample_issues(n: usize) -> Vec<Issue> {
345        (0..n)
346            .map(|i| Issue {
347                key: format!("gh#{}", i + 1),
348                title: format!("Issue {}", i + 1),
349                description: if i % 2 == 0 {
350                    Some("A".repeat(200)) // long description
351                } else {
352                    Some("Short desc".into())
353                },
354                state: "open".into(),
355                source: "github".into(),
356                priority: None,
357                labels: vec!["bug".into()],
358                author: Some(User {
359                    id: format!("{}", i),
360                    username: format!("user{}", i),
361                    name: None,
362                    email: None,
363                    avatar_url: None,
364                }),
365                assignees: vec![],
366                url: Some(format!("https://github.com/test/repo/issues/{}", i + 1)),
367                created_at: Some("2024-01-01T00:00:00Z".into()),
368                updated_at: Some("2024-01-02T00:00:00Z".into()),
369                attachments_count: None,
370                parent: None,
371                subtasks: vec![],
372                custom_fields: std::collections::HashMap::new(),
373            })
374            .collect()
375    }
376
377    fn sample_diffs(n: usize) -> Vec<FileDiff> {
378        (0..n)
379            .map(|i| FileDiff {
380                file_path: format!("src/file_{}.rs", i),
381                old_path: None,
382                new_file: false,
383                deleted_file: false,
384                renamed_file: false,
385                diff: format!("+added line {}\n-removed line {}", i, i),
386                additions: Some(1),
387                deletions: Some(1),
388            })
389            .collect()
390    }
391
392    fn sample_comments(n: usize) -> Vec<Comment> {
393        (0..n)
394            .map(|i| Comment {
395                id: format!("{}", i),
396                body: format!("Comment body {}", i),
397                author: None,
398                created_at: Some("2024-01-01T00:00:00Z".into()),
399                updated_at: None,
400                position: None,
401            })
402            .collect()
403    }
404
405    fn sample_discussions(n: usize) -> Vec<Discussion> {
406        (0..n)
407            .map(|i| Discussion {
408                id: format!("{}", i),
409                resolved: i % 2 == 0,
410                resolved_by: None,
411                comments: vec![
412                    Comment {
413                        id: format!("c{}a", i),
414                        body: format!("First comment in discussion {}", i),
415                        author: None,
416                        created_at: None,
417                        updated_at: None,
418                        position: None,
419                    },
420                    Comment {
421                        id: format!("c{}b", i),
422                        body: format!("Reply in discussion {}", i),
423                        author: None,
424                        created_at: None,
425                        updated_at: None,
426                        position: None,
427                    },
428                ],
429                position: None,
430            })
431            .collect()
432    }
433
434    // --- Structure tests ---
435
436    #[test]
437    fn test_build_issues_tree_structure() {
438        let issues = sample_issues(5);
439        let tree = build_issues_tree(&issues);
440
441        assert_eq!(tree.kind, NodeKind::Root);
442        assert_eq!(tree.children.len(), 5);
443        assert!(tree.weight == 0); // root has no own weight
444
445        // Each child is an Item
446        for (i, child) in tree.children.iter().enumerate() {
447            assert_eq!(child.kind, NodeKind::Item { index: i });
448            assert!(child.weight > 0);
449            assert!(child.included);
450        }
451    }
452
453    #[test]
454    fn test_build_issues_tree_with_description_fields() {
455        let issues = sample_issues(4);
456        let tree = build_issues_tree(&issues);
457
458        // Issues 0, 2 have long description → Field child
459        assert!(
460            !tree.children[0].children.is_empty(),
461            "Issue 0 should have description field"
462        );
463        assert!(
464            tree.children[1].children.is_empty(),
465            "Issue 1 should not have description field (short)"
466        );
467        assert!(!tree.children[2].children.is_empty());
468        assert!(tree.children[3].children.is_empty());
469    }
470
471    #[test]
472    fn test_build_diffs_tree_structure() {
473        let diffs = sample_diffs(3);
474        let tree = build_diffs_tree(&diffs);
475
476        assert_eq!(tree.children.len(), 3);
477        // Each diff has diff content as a Field child
478        for child in &tree.children {
479            assert_eq!(child.children.len(), 1);
480            assert_eq!(
481                child.children[0].kind,
482                NodeKind::Field {
483                    name: "diff".into()
484                }
485            );
486        }
487    }
488
489    #[test]
490    fn test_build_comments_tree_structure() {
491        let comments = sample_comments(5);
492        let tree = build_comments_tree(&comments);
493
494        assert_eq!(tree.children.len(), 5);
495        // Short comments — no child fields
496        for child in &tree.children {
497            assert!(child.children.is_empty());
498        }
499    }
500
501    #[test]
502    fn test_build_discussions_tree_structure() {
503        let discussions = sample_discussions(3);
504        let tree = build_discussions_tree(&discussions);
505
506        assert_eq!(tree.children.len(), 3);
507        // Each discussion has 2 comment children
508        for disc in &tree.children {
509            assert_eq!(disc.children.len(), 2);
510        }
511    }
512
513    #[test]
514    fn test_build_merge_requests_tree() {
515        let mrs: Vec<MergeRequest> = (0..3)
516            .map(|i| MergeRequest {
517                key: format!("pr#{}", i),
518                title: format!("PR {}", i),
519                description: Some("A".repeat(200)),
520                state: "open".into(),
521                source: "github".into(),
522                source_branch: "feat".into(),
523                target_branch: "main".into(),
524                author: None,
525                assignees: vec![],
526                reviewers: vec![],
527                labels: vec![],
528                draft: false,
529                url: None,
530                created_at: None,
531                updated_at: None,
532            })
533            .collect();
534
535        let tree = build_merge_requests_tree(&mrs);
536        assert_eq!(tree.children.len(), 3);
537        // Long description → Field child
538        for child in &tree.children {
539            assert!(!child.children.is_empty());
540        }
541    }
542
543    // --- Counting tests ---
544
545    #[test]
546    fn test_count_nodes() {
547        let issues = sample_issues(5);
548        let tree = build_issues_tree(&issues);
549
550        // Root + 5 items + some description fields
551        assert!(tree.count_nodes() >= 6);
552    }
553
554    #[test]
555    fn test_total_weight() {
556        let issues = sample_issues(3);
557        let tree = build_issues_tree(&issues);
558
559        let total = tree.total_weight();
560        assert!(total > 0);
561
562        // Weight should be sum of all children
563        let manual_sum: usize = tree.children.iter().map(|c| c.total_weight()).sum();
564        assert_eq!(total, manual_sum); // root weight is 0
565    }
566
567    #[test]
568    fn test_included_items_count() {
569        let issues = sample_issues(5);
570        let mut tree = build_issues_tree(&issues);
571
572        assert_eq!(tree.included_items_count(), 5);
573
574        // Exclude 2 items
575        tree.children[1].included = false;
576        tree.children[3].included = false;
577
578        assert_eq!(tree.included_items_count(), 3);
579    }
580
581    #[test]
582    fn test_included_excluded_indices() {
583        let issues = sample_issues(5);
584        let mut tree = build_issues_tree(&issues);
585
586        tree.children[1].included = false;
587        tree.children[3].included = false;
588
589        let included = tree.included_item_indices();
590        let excluded = tree.excluded_item_indices();
591
592        assert_eq!(included, vec![0, 2, 4]);
593        assert_eq!(excluded, vec![1, 3]);
594    }
595
596    // --- Weight correctness ---
597
598    #[test]
599    fn test_weights_are_positive() {
600        let issues = sample_issues(10);
601        let tree = build_issues_tree(&issues);
602
603        for child in &tree.children {
604            assert!(
605                child.weight > 0 || !child.children.is_empty(),
606                "Item should have weight or children with weight"
607            );
608            assert!(child.total_weight() > 0);
609        }
610    }
611
612    #[test]
613    fn test_total_weight_decreases_when_excluded() {
614        let issues = sample_issues(5);
615        let mut tree = build_issues_tree(&issues);
616
617        let full_weight = tree.total_weight();
618        tree.children[0].included = false;
619        let reduced_weight = tree.total_weight();
620
621        assert!(reduced_weight < full_weight);
622    }
623
624    // --- Density ---
625
626    #[test]
627    fn test_density_calculation() {
628        let mut node = TrimNode::new(0, NodeKind::Item { index: 0 }, 100);
629        node.value = 0.5;
630        assert!((node.density() - 0.005).abs() < 0.0001);
631
632        let zero_node = TrimNode::new(1, NodeKind::Item { index: 1 }, 0);
633        assert_eq!(zero_node.density(), 0.0);
634    }
635}