Skip to main content

devboy_format_pipeline/
tree.rs

1//! Trim tree (TrimTree) for budget trimming.
2//!
3//! The JSON response is modeled as a rooted tree T = (V, E) where each node has:
4//! - weight -- cost in tokens (serialization without children)
5//! - value -- information value (depends on the strategy)
6//! - included -- whether the node is included in the final output
7//!
8//! Builders convert typed data into TrimNode trees with precomputed weights.
9
10use devboy_core::{Comment, Discussion, FileDiff, Issue, MergeRequest};
11
12use crate::token_counter::estimate_tokens;
13use crate::toon;
14
15/// Trim tree node.
16#[derive(Debug, Clone)]
17pub struct TrimNode {
18    /// Unique node identifier within the tree
19    pub id: usize,
20    pub kind: NodeKind,
21    /// Weight in tokens (serialization cost without children)
22    pub weight: usize,
23    /// Information value (0.0-1.0), assigned by the strategy
24    pub value: f64,
25    /// Child nodes
26    pub children: Vec<TrimNode>,
27    /// Whether the node is included in the final output
28    pub included: bool,
29}
30
31/// Node type in the trim tree.
32#[derive(Debug, Clone, PartialEq)]
33pub enum NodeKind {
34    /// Root node (collection)
35    Root,
36    /// Collection element (issue, MR, comment, diff)
37    Item {
38        /// Index of the element in the original array
39        index: usize,
40    },
41    /// Field within an element (description, body, diff content)
42    Field { name: String },
43    /// Text block (description body, diff content, log content)
44    Text,
45}
46
47impl TrimNode {
48    /// Create a new node.
49    pub fn new(id: usize, kind: NodeKind, weight: usize) -> Self {
50        Self {
51            id,
52            kind,
53            weight,
54            value: 1.0, // default value, overridden by the strategy
55            children: Vec::new(),
56            included: true,
57        }
58    }
59
60    /// Total number of nodes in the subtree (including self).
61    pub fn count_nodes(&self) -> usize {
62        1 + self.children.iter().map(|c| c.count_nodes()).sum::<usize>()
63    }
64
65    /// Total weight of the subtree (sum of weights of all included nodes).
66    pub fn total_weight(&self) -> usize {
67        if !self.included {
68            return 0;
69        }
70        self.weight
71            + self
72                .children
73                .iter()
74                .map(|c| c.total_weight())
75                .sum::<usize>()
76    }
77
78    /// Total value of the subtree (sum of value * weight for included nodes).
79    pub fn total_value(&self) -> f64 {
80        if !self.included {
81            return 0.0;
82        }
83        self.value * self.weight as f64 + self.children.iter().map(|c| c.total_value()).sum::<f64>()
84    }
85
86    /// Information density: value / weight.
87    pub fn density(&self) -> f64 {
88        if self.weight == 0 {
89            return 0.0;
90        }
91        self.value / self.weight as f64
92    }
93
94    /// Count of included Item nodes.
95    pub fn included_items_count(&self) -> usize {
96        let self_count = if self.included && matches!(self.kind, NodeKind::Item { .. }) {
97            1
98        } else {
99            0
100        };
101        self_count
102            + self
103                .children
104                .iter()
105                .map(|c| c.included_items_count())
106                .sum::<usize>()
107    }
108
109    /// Collect indices of included Item nodes.
110    pub fn included_item_indices(&self) -> Vec<usize> {
111        let mut indices = Vec::new();
112        self.collect_included_indices(&mut indices);
113        indices
114    }
115
116    fn collect_included_indices(&self, indices: &mut Vec<usize>) {
117        if self.included
118            && let NodeKind::Item { index } = &self.kind
119        {
120            indices.push(*index);
121        }
122        if self.included {
123            for child in &self.children {
124                child.collect_included_indices(indices);
125            }
126        }
127    }
128
129    /// Collect indices of excluded Item nodes (overflow).
130    pub fn excluded_item_indices(&self) -> Vec<usize> {
131        let mut indices = Vec::new();
132        self.collect_excluded_indices(&mut indices);
133        indices
134    }
135
136    fn collect_excluded_indices(&self, indices: &mut Vec<usize>) {
137        if !self.included
138            && let NodeKind::Item { index } = &self.kind
139        {
140            indices.push(*index);
141            // All descendants of an excluded node are also excluded
142        } else if self.included {
143            for child in &self.children {
144                child.collect_excluded_indices(indices);
145            }
146        }
147    }
148}
149
150// ============================================================================
151// ID Generator
152// ============================================================================
153
154/// Unique ID generator for tree nodes.
155struct IdGen(usize);
156
157impl IdGen {
158    fn new() -> Self {
159        Self(0)
160    }
161
162    fn next(&mut self) -> usize {
163        let id = self.0;
164        self.0 += 1;
165        id
166    }
167}
168
169// ============================================================================
170// Builders
171// ============================================================================
172
173/// Build a tree from a list of issues.
174///
175/// Structure: Root -> [Item(0), Item(1), ...].
176/// Each Item may contain a Field("description") if the description is long.
177pub fn build_issues_tree(issues: &[Issue]) -> TrimNode {
178    let mut id_gen = IdGen::new();
179    let mut root = TrimNode::new(id_gen.next(), NodeKind::Root, 0);
180
181    for (i, issue) in issues.iter().enumerate() {
182        let item_weight = estimate_item_tokens(issue);
183        let mut item = TrimNode::new(id_gen.next(), NodeKind::Item { index: i }, item_weight);
184
185        // If description > 100 chars -- extract into a separate Field for granular trimming
186        if let Some(desc) = &issue.description
187            && desc.len() > 100
188        {
189            let desc_weight = estimate_tokens(desc);
190            item.weight = item.weight.saturating_sub(desc_weight);
191            let field = TrimNode::new(
192                id_gen.next(),
193                NodeKind::Field {
194                    name: "description".into(),
195                },
196                desc_weight,
197            );
198            item.children.push(field);
199        }
200
201        root.children.push(item);
202    }
203
204    root
205}
206
207pub fn build_merge_requests_tree(mrs: &[MergeRequest]) -> TrimNode {
208    let mut id_gen = IdGen::new();
209    let mut root = TrimNode::new(id_gen.next(), NodeKind::Root, 0);
210
211    for (i, mr) in mrs.iter().enumerate() {
212        let item_weight = estimate_item_tokens(mr);
213        let mut item = TrimNode::new(id_gen.next(), NodeKind::Item { index: i }, item_weight);
214
215        if let Some(desc) = &mr.description
216            && desc.len() > 100
217        {
218            let desc_weight = estimate_tokens(desc);
219            item.weight = item.weight.saturating_sub(desc_weight);
220            let field = TrimNode::new(
221                id_gen.next(),
222                NodeKind::Field {
223                    name: "description".into(),
224                },
225                desc_weight,
226            );
227            item.children.push(field);
228        }
229
230        root.children.push(item);
231    }
232
233    root
234}
235
236/// Build a tree from a list of file diffs.
237///
238/// Diff content is extracted into Field("diff") for granular trimming by file type.
239pub fn build_diffs_tree(diffs: &[FileDiff]) -> TrimNode {
240    let mut id_gen = IdGen::new();
241    let mut root = TrimNode::new(id_gen.next(), NodeKind::Root, 0);
242
243    for (i, diff) in diffs.iter().enumerate() {
244        let item_weight = estimate_item_tokens(diff);
245        let mut item = TrimNode::new(id_gen.next(), NodeKind::Item { index: i }, item_weight);
246
247        // Diff content is always extracted into a separate node
248        if !diff.diff.is_empty() {
249            let diff_weight = estimate_tokens(&diff.diff);
250            item.weight = item.weight.saturating_sub(diff_weight);
251            let field = TrimNode::new(
252                id_gen.next(),
253                NodeKind::Field {
254                    name: "diff".into(),
255                },
256                diff_weight,
257            );
258            item.children.push(field);
259        }
260
261        root.children.push(item);
262    }
263
264    root
265}
266
267pub fn build_comments_tree(comments: &[Comment]) -> TrimNode {
268    let mut id_gen = IdGen::new();
269    let mut root = TrimNode::new(id_gen.next(), NodeKind::Root, 0);
270
271    for (i, comment) in comments.iter().enumerate() {
272        let item_weight = estimate_item_tokens(comment);
273        let mut item = TrimNode::new(id_gen.next(), NodeKind::Item { index: i }, item_weight);
274
275        // Body > 200 chars -- extract into a separate node
276        if comment.body.len() > 200 {
277            let body_weight = estimate_tokens(&comment.body);
278            item.weight = item.weight.saturating_sub(body_weight);
279            let field = TrimNode::new(
280                id_gen.next(),
281                NodeKind::Field {
282                    name: "body".into(),
283                },
284                body_weight,
285            );
286            item.children.push(field);
287        }
288
289        root.children.push(item);
290    }
291
292    root
293}
294
295/// Build a tree from a list of discussions.
296///
297/// Two-level structure: Root -> Discussion(Item) -> Comment(Item).
298pub fn build_discussions_tree(discussions: &[Discussion]) -> TrimNode {
299    let mut id_gen = IdGen::new();
300    let mut root = TrimNode::new(id_gen.next(), NodeKind::Root, 0);
301
302    for (i, discussion) in discussions.iter().enumerate() {
303        // Discussion weight = metadata (id, resolved, position) without comments
304        let metadata_weight = estimate_tokens(&format!(
305            "id:{} resolved:{}",
306            discussion.id, discussion.resolved
307        ));
308        let mut disc_node =
309            TrimNode::new(id_gen.next(), NodeKind::Item { index: i }, metadata_weight);
310
311        // Each comment is a separate child
312        for (j, comment) in discussion.comments.iter().enumerate() {
313            let comment_weight = estimate_item_tokens(comment);
314            let comment_node =
315                TrimNode::new(id_gen.next(), NodeKind::Item { index: j }, comment_weight);
316            disc_node.children.push(comment_node);
317        }
318
319        root.children.push(disc_node);
320    }
321
322    root
323}
324
325/// Estimate tokens for a single element via TOON encode.
326fn estimate_item_tokens<T: serde::Serialize>(item: &T) -> usize {
327    match toon::encode_value(item) {
328        Ok(encoded) => estimate_tokens(&encoded),
329        Err(_) => {
330            // Fallback: estimate via JSON
331            match serde_json::to_string(item) {
332                Ok(json) => estimate_tokens(&json),
333                Err(_) => 50, // minimum estimate
334            }
335        }
336    }
337}
338
339#[cfg(test)]
340mod tests {
341    use super::*;
342    use devboy_core::User;
343
344    fn sample_issues(n: usize) -> Vec<Issue> {
345        (0..n)
346            .map(|i| Issue {
347                key: format!("gh#{}", i + 1),
348                title: format!("Issue {}", i + 1),
349                description: if i % 2 == 0 {
350                    Some("A".repeat(200)) // long description
351                } else {
352                    Some("Short desc".into())
353                },
354                state: "open".into(),
355                source: "github".into(),
356                priority: None,
357                labels: vec!["bug".into()],
358                author: Some(User {
359                    id: format!("{}", i),
360                    username: format!("user{}", i),
361                    name: None,
362                    email: None,
363                    avatar_url: None,
364                }),
365                assignees: vec![],
366                url: Some(format!("https://github.com/test/repo/issues/{}", i + 1)),
367                created_at: Some("2024-01-01T00:00:00Z".into()),
368                updated_at: Some("2024-01-02T00:00:00Z".into()),
369                attachments_count: None,
370                parent: None,
371                subtasks: vec![],
372            })
373            .collect()
374    }
375
376    fn sample_diffs(n: usize) -> Vec<FileDiff> {
377        (0..n)
378            .map(|i| FileDiff {
379                file_path: format!("src/file_{}.rs", i),
380                old_path: None,
381                new_file: false,
382                deleted_file: false,
383                renamed_file: false,
384                diff: format!("+added line {}\n-removed line {}", i, i),
385                additions: Some(1),
386                deletions: Some(1),
387            })
388            .collect()
389    }
390
391    fn sample_comments(n: usize) -> Vec<Comment> {
392        (0..n)
393            .map(|i| Comment {
394                id: format!("{}", i),
395                body: format!("Comment body {}", i),
396                author: None,
397                created_at: Some("2024-01-01T00:00:00Z".into()),
398                updated_at: None,
399                position: None,
400            })
401            .collect()
402    }
403
404    fn sample_discussions(n: usize) -> Vec<Discussion> {
405        (0..n)
406            .map(|i| Discussion {
407                id: format!("{}", i),
408                resolved: i % 2 == 0,
409                resolved_by: None,
410                comments: vec![
411                    Comment {
412                        id: format!("c{}a", i),
413                        body: format!("First comment in discussion {}", i),
414                        author: None,
415                        created_at: None,
416                        updated_at: None,
417                        position: None,
418                    },
419                    Comment {
420                        id: format!("c{}b", i),
421                        body: format!("Reply in discussion {}", i),
422                        author: None,
423                        created_at: None,
424                        updated_at: None,
425                        position: None,
426                    },
427                ],
428                position: None,
429            })
430            .collect()
431    }
432
433    // --- Structure tests ---
434
435    #[test]
436    fn test_build_issues_tree_structure() {
437        let issues = sample_issues(5);
438        let tree = build_issues_tree(&issues);
439
440        assert_eq!(tree.kind, NodeKind::Root);
441        assert_eq!(tree.children.len(), 5);
442        assert!(tree.weight == 0); // root has no own weight
443
444        // Each child is an Item
445        for (i, child) in tree.children.iter().enumerate() {
446            assert_eq!(child.kind, NodeKind::Item { index: i });
447            assert!(child.weight > 0);
448            assert!(child.included);
449        }
450    }
451
452    #[test]
453    fn test_build_issues_tree_with_description_fields() {
454        let issues = sample_issues(4);
455        let tree = build_issues_tree(&issues);
456
457        // Issues 0, 2 have long description → Field child
458        assert!(
459            !tree.children[0].children.is_empty(),
460            "Issue 0 should have description field"
461        );
462        assert!(
463            tree.children[1].children.is_empty(),
464            "Issue 1 should not have description field (short)"
465        );
466        assert!(!tree.children[2].children.is_empty());
467        assert!(tree.children[3].children.is_empty());
468    }
469
470    #[test]
471    fn test_build_diffs_tree_structure() {
472        let diffs = sample_diffs(3);
473        let tree = build_diffs_tree(&diffs);
474
475        assert_eq!(tree.children.len(), 3);
476        // Each diff has diff content as a Field child
477        for child in &tree.children {
478            assert_eq!(child.children.len(), 1);
479            assert_eq!(
480                child.children[0].kind,
481                NodeKind::Field {
482                    name: "diff".into()
483                }
484            );
485        }
486    }
487
488    #[test]
489    fn test_build_comments_tree_structure() {
490        let comments = sample_comments(5);
491        let tree = build_comments_tree(&comments);
492
493        assert_eq!(tree.children.len(), 5);
494        // Short comments — no child fields
495        for child in &tree.children {
496            assert!(child.children.is_empty());
497        }
498    }
499
500    #[test]
501    fn test_build_discussions_tree_structure() {
502        let discussions = sample_discussions(3);
503        let tree = build_discussions_tree(&discussions);
504
505        assert_eq!(tree.children.len(), 3);
506        // Each discussion has 2 comment children
507        for disc in &tree.children {
508            assert_eq!(disc.children.len(), 2);
509        }
510    }
511
512    #[test]
513    fn test_build_merge_requests_tree() {
514        let mrs: Vec<MergeRequest> = (0..3)
515            .map(|i| MergeRequest {
516                key: format!("pr#{}", i),
517                title: format!("PR {}", i),
518                description: Some("A".repeat(200)),
519                state: "open".into(),
520                source: "github".into(),
521                source_branch: "feat".into(),
522                target_branch: "main".into(),
523                author: None,
524                assignees: vec![],
525                reviewers: vec![],
526                labels: vec![],
527                draft: false,
528                url: None,
529                created_at: None,
530                updated_at: None,
531            })
532            .collect();
533
534        let tree = build_merge_requests_tree(&mrs);
535        assert_eq!(tree.children.len(), 3);
536        // Long description → Field child
537        for child in &tree.children {
538            assert!(!child.children.is_empty());
539        }
540    }
541
542    // --- Counting tests ---
543
544    #[test]
545    fn test_count_nodes() {
546        let issues = sample_issues(5);
547        let tree = build_issues_tree(&issues);
548
549        // Root + 5 items + some description fields
550        assert!(tree.count_nodes() >= 6);
551    }
552
553    #[test]
554    fn test_total_weight() {
555        let issues = sample_issues(3);
556        let tree = build_issues_tree(&issues);
557
558        let total = tree.total_weight();
559        assert!(total > 0);
560
561        // Weight should be sum of all children
562        let manual_sum: usize = tree.children.iter().map(|c| c.total_weight()).sum();
563        assert_eq!(total, manual_sum); // root weight is 0
564    }
565
566    #[test]
567    fn test_included_items_count() {
568        let issues = sample_issues(5);
569        let mut tree = build_issues_tree(&issues);
570
571        assert_eq!(tree.included_items_count(), 5);
572
573        // Exclude 2 items
574        tree.children[1].included = false;
575        tree.children[3].included = false;
576
577        assert_eq!(tree.included_items_count(), 3);
578    }
579
580    #[test]
581    fn test_included_excluded_indices() {
582        let issues = sample_issues(5);
583        let mut tree = build_issues_tree(&issues);
584
585        tree.children[1].included = false;
586        tree.children[3].included = false;
587
588        let included = tree.included_item_indices();
589        let excluded = tree.excluded_item_indices();
590
591        assert_eq!(included, vec![0, 2, 4]);
592        assert_eq!(excluded, vec![1, 3]);
593    }
594
595    // --- Weight correctness ---
596
597    #[test]
598    fn test_weights_are_positive() {
599        let issues = sample_issues(10);
600        let tree = build_issues_tree(&issues);
601
602        for child in &tree.children {
603            assert!(
604                child.weight > 0 || !child.children.is_empty(),
605                "Item should have weight or children with weight"
606            );
607            assert!(child.total_weight() > 0);
608        }
609    }
610
611    #[test]
612    fn test_total_weight_decreases_when_excluded() {
613        let issues = sample_issues(5);
614        let mut tree = build_issues_tree(&issues);
615
616        let full_weight = tree.total_weight();
617        tree.children[0].included = false;
618        let reduced_weight = tree.total_weight();
619
620        assert!(reduced_weight < full_weight);
621    }
622
623    // --- Density ---
624
625    #[test]
626    fn test_density_calculation() {
627        let mut node = TrimNode::new(0, NodeKind::Item { index: 0 }, 100);
628        node.value = 0.5;
629        assert!((node.density() - 0.005).abs() < 0.0001);
630
631        let zero_node = TrimNode::new(1, NodeKind::Item { index: 1 }, 0);
632        assert_eq!(zero_node.density(), 0.0);
633    }
634}