use devboy_core::{Comment, Discussion, FileDiff, Issue, MergeRequest};
use crate::token_counter::estimate_tokens;
use crate::toon;
#[derive(Debug, Clone)]
pub struct TrimNode {
pub id: usize,
pub kind: NodeKind,
pub weight: usize,
pub value: f64,
pub children: Vec<TrimNode>,
pub included: bool,
}
#[derive(Debug, Clone, PartialEq)]
pub enum NodeKind {
Root,
Item {
index: usize,
},
Field { name: String },
Text,
}
impl TrimNode {
pub fn new(id: usize, kind: NodeKind, weight: usize) -> Self {
Self {
id,
kind,
weight,
value: 1.0, children: Vec::new(),
included: true,
}
}
pub fn count_nodes(&self) -> usize {
1 + self.children.iter().map(|c| c.count_nodes()).sum::<usize>()
}
pub fn total_weight(&self) -> usize {
if !self.included {
return 0;
}
self.weight
+ self
.children
.iter()
.map(|c| c.total_weight())
.sum::<usize>()
}
pub fn total_value(&self) -> f64 {
if !self.included {
return 0.0;
}
self.value * self.weight as f64 + self.children.iter().map(|c| c.total_value()).sum::<f64>()
}
pub fn density(&self) -> f64 {
if self.weight == 0 {
return 0.0;
}
self.value / self.weight as f64
}
pub fn included_items_count(&self) -> usize {
let self_count = if self.included && matches!(self.kind, NodeKind::Item { .. }) {
1
} else {
0
};
self_count
+ self
.children
.iter()
.map(|c| c.included_items_count())
.sum::<usize>()
}
pub fn included_item_indices(&self) -> Vec<usize> {
let mut indices = Vec::new();
self.collect_included_indices(&mut indices);
indices
}
fn collect_included_indices(&self, indices: &mut Vec<usize>) {
if self.included
&& let NodeKind::Item { index } = &self.kind
{
indices.push(*index);
}
if self.included {
for child in &self.children {
child.collect_included_indices(indices);
}
}
}
pub fn excluded_item_indices(&self) -> Vec<usize> {
let mut indices = Vec::new();
self.collect_excluded_indices(&mut indices);
indices
}
fn collect_excluded_indices(&self, indices: &mut Vec<usize>) {
if !self.included
&& let NodeKind::Item { index } = &self.kind
{
indices.push(*index);
} else if self.included {
for child in &self.children {
child.collect_excluded_indices(indices);
}
}
}
}
struct IdGen(usize);
impl IdGen {
fn new() -> Self {
Self(0)
}
fn next(&mut self) -> usize {
let id = self.0;
self.0 += 1;
id
}
}
pub fn build_issues_tree(issues: &[Issue]) -> TrimNode {
let mut id_gen = IdGen::new();
let mut root = TrimNode::new(id_gen.next(), NodeKind::Root, 0);
for (i, issue) in issues.iter().enumerate() {
let item_weight = estimate_item_tokens(issue);
let mut item = TrimNode::new(id_gen.next(), NodeKind::Item { index: i }, item_weight);
if let Some(desc) = &issue.description
&& desc.len() > 100
{
let desc_weight = estimate_tokens(desc);
item.weight = item.weight.saturating_sub(desc_weight);
let field = TrimNode::new(
id_gen.next(),
NodeKind::Field {
name: "description".into(),
},
desc_weight,
);
item.children.push(field);
}
root.children.push(item);
}
root
}
pub fn build_merge_requests_tree(mrs: &[MergeRequest]) -> TrimNode {
let mut id_gen = IdGen::new();
let mut root = TrimNode::new(id_gen.next(), NodeKind::Root, 0);
for (i, mr) in mrs.iter().enumerate() {
let item_weight = estimate_item_tokens(mr);
let mut item = TrimNode::new(id_gen.next(), NodeKind::Item { index: i }, item_weight);
if let Some(desc) = &mr.description
&& desc.len() > 100
{
let desc_weight = estimate_tokens(desc);
item.weight = item.weight.saturating_sub(desc_weight);
let field = TrimNode::new(
id_gen.next(),
NodeKind::Field {
name: "description".into(),
},
desc_weight,
);
item.children.push(field);
}
root.children.push(item);
}
root
}
pub fn build_diffs_tree(diffs: &[FileDiff]) -> TrimNode {
let mut id_gen = IdGen::new();
let mut root = TrimNode::new(id_gen.next(), NodeKind::Root, 0);
for (i, diff) in diffs.iter().enumerate() {
let item_weight = estimate_item_tokens(diff);
let mut item = TrimNode::new(id_gen.next(), NodeKind::Item { index: i }, item_weight);
if !diff.diff.is_empty() {
let diff_weight = estimate_tokens(&diff.diff);
item.weight = item.weight.saturating_sub(diff_weight);
let field = TrimNode::new(
id_gen.next(),
NodeKind::Field {
name: "diff".into(),
},
diff_weight,
);
item.children.push(field);
}
root.children.push(item);
}
root
}
pub fn build_comments_tree(comments: &[Comment]) -> TrimNode {
let mut id_gen = IdGen::new();
let mut root = TrimNode::new(id_gen.next(), NodeKind::Root, 0);
for (i, comment) in comments.iter().enumerate() {
let item_weight = estimate_item_tokens(comment);
let mut item = TrimNode::new(id_gen.next(), NodeKind::Item { index: i }, item_weight);
if comment.body.len() > 200 {
let body_weight = estimate_tokens(&comment.body);
item.weight = item.weight.saturating_sub(body_weight);
let field = TrimNode::new(
id_gen.next(),
NodeKind::Field {
name: "body".into(),
},
body_weight,
);
item.children.push(field);
}
root.children.push(item);
}
root
}
pub fn build_discussions_tree(discussions: &[Discussion]) -> TrimNode {
let mut id_gen = IdGen::new();
let mut root = TrimNode::new(id_gen.next(), NodeKind::Root, 0);
for (i, discussion) in discussions.iter().enumerate() {
let metadata_weight = estimate_tokens(&format!(
"id:{} resolved:{}",
discussion.id, discussion.resolved
));
let mut disc_node =
TrimNode::new(id_gen.next(), NodeKind::Item { index: i }, metadata_weight);
for (j, comment) in discussion.comments.iter().enumerate() {
let comment_weight = estimate_item_tokens(comment);
let comment_node =
TrimNode::new(id_gen.next(), NodeKind::Item { index: j }, comment_weight);
disc_node.children.push(comment_node);
}
root.children.push(disc_node);
}
root
}
fn estimate_item_tokens<T: serde::Serialize>(item: &T) -> usize {
match toon::encode_value(item) {
Ok(encoded) => estimate_tokens(&encoded),
Err(_) => {
match serde_json::to_string(item) {
Ok(json) => estimate_tokens(&json),
Err(_) => 50, }
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use devboy_core::User;
fn sample_issues(n: usize) -> Vec<Issue> {
(0..n)
.map(|i| Issue {
key: format!("gh#{}", i + 1),
title: format!("Issue {}", i + 1),
description: if i % 2 == 0 {
Some("A".repeat(200)) } else {
Some("Short desc".into())
},
state: "open".into(),
source: "github".into(),
priority: None,
labels: vec!["bug".into()],
author: Some(User {
id: format!("{}", i),
username: format!("user{}", i),
name: None,
email: None,
avatar_url: None,
}),
assignees: vec![],
url: Some(format!("https://github.com/test/repo/issues/{}", i + 1)),
created_at: Some("2024-01-01T00:00:00Z".into()),
updated_at: Some("2024-01-02T00:00:00Z".into()),
attachments_count: None,
parent: None,
subtasks: vec![],
custom_fields: std::collections::HashMap::new(),
})
.collect()
}
fn sample_diffs(n: usize) -> Vec<FileDiff> {
(0..n)
.map(|i| FileDiff {
file_path: format!("src/file_{}.rs", i),
old_path: None,
new_file: false,
deleted_file: false,
renamed_file: false,
diff: format!("+added line {}\n-removed line {}", i, i),
additions: Some(1),
deletions: Some(1),
})
.collect()
}
fn sample_comments(n: usize) -> Vec<Comment> {
(0..n)
.map(|i| Comment {
id: format!("{}", i),
body: format!("Comment body {}", i),
author: None,
created_at: Some("2024-01-01T00:00:00Z".into()),
updated_at: None,
position: None,
})
.collect()
}
fn sample_discussions(n: usize) -> Vec<Discussion> {
(0..n)
.map(|i| Discussion {
id: format!("{}", i),
resolved: i % 2 == 0,
resolved_by: None,
comments: vec![
Comment {
id: format!("c{}a", i),
body: format!("First comment in discussion {}", i),
author: None,
created_at: None,
updated_at: None,
position: None,
},
Comment {
id: format!("c{}b", i),
body: format!("Reply in discussion {}", i),
author: None,
created_at: None,
updated_at: None,
position: None,
},
],
position: None,
})
.collect()
}
#[test]
fn test_build_issues_tree_structure() {
let issues = sample_issues(5);
let tree = build_issues_tree(&issues);
assert_eq!(tree.kind, NodeKind::Root);
assert_eq!(tree.children.len(), 5);
assert!(tree.weight == 0);
for (i, child) in tree.children.iter().enumerate() {
assert_eq!(child.kind, NodeKind::Item { index: i });
assert!(child.weight > 0);
assert!(child.included);
}
}
#[test]
fn test_build_issues_tree_with_description_fields() {
let issues = sample_issues(4);
let tree = build_issues_tree(&issues);
assert!(
!tree.children[0].children.is_empty(),
"Issue 0 should have description field"
);
assert!(
tree.children[1].children.is_empty(),
"Issue 1 should not have description field (short)"
);
assert!(!tree.children[2].children.is_empty());
assert!(tree.children[3].children.is_empty());
}
#[test]
fn test_build_diffs_tree_structure() {
let diffs = sample_diffs(3);
let tree = build_diffs_tree(&diffs);
assert_eq!(tree.children.len(), 3);
for child in &tree.children {
assert_eq!(child.children.len(), 1);
assert_eq!(
child.children[0].kind,
NodeKind::Field {
name: "diff".into()
}
);
}
}
#[test]
fn test_build_comments_tree_structure() {
let comments = sample_comments(5);
let tree = build_comments_tree(&comments);
assert_eq!(tree.children.len(), 5);
for child in &tree.children {
assert!(child.children.is_empty());
}
}
#[test]
fn test_build_discussions_tree_structure() {
let discussions = sample_discussions(3);
let tree = build_discussions_tree(&discussions);
assert_eq!(tree.children.len(), 3);
for disc in &tree.children {
assert_eq!(disc.children.len(), 2);
}
}
#[test]
fn test_build_merge_requests_tree() {
let mrs: Vec<MergeRequest> = (0..3)
.map(|i| MergeRequest {
key: format!("pr#{}", i),
title: format!("PR {}", i),
description: Some("A".repeat(200)),
state: "open".into(),
source: "github".into(),
source_branch: "feat".into(),
target_branch: "main".into(),
author: None,
assignees: vec![],
reviewers: vec![],
labels: vec![],
draft: false,
url: None,
created_at: None,
updated_at: None,
})
.collect();
let tree = build_merge_requests_tree(&mrs);
assert_eq!(tree.children.len(), 3);
for child in &tree.children {
assert!(!child.children.is_empty());
}
}
#[test]
fn test_count_nodes() {
let issues = sample_issues(5);
let tree = build_issues_tree(&issues);
assert!(tree.count_nodes() >= 6);
}
#[test]
fn test_total_weight() {
let issues = sample_issues(3);
let tree = build_issues_tree(&issues);
let total = tree.total_weight();
assert!(total > 0);
let manual_sum: usize = tree.children.iter().map(|c| c.total_weight()).sum();
assert_eq!(total, manual_sum); }
#[test]
fn test_included_items_count() {
let issues = sample_issues(5);
let mut tree = build_issues_tree(&issues);
assert_eq!(tree.included_items_count(), 5);
tree.children[1].included = false;
tree.children[3].included = false;
assert_eq!(tree.included_items_count(), 3);
}
#[test]
fn test_included_excluded_indices() {
let issues = sample_issues(5);
let mut tree = build_issues_tree(&issues);
tree.children[1].included = false;
tree.children[3].included = false;
let included = tree.included_item_indices();
let excluded = tree.excluded_item_indices();
assert_eq!(included, vec![0, 2, 4]);
assert_eq!(excluded, vec![1, 3]);
}
#[test]
fn test_weights_are_positive() {
let issues = sample_issues(10);
let tree = build_issues_tree(&issues);
for child in &tree.children {
assert!(
child.weight > 0 || !child.children.is_empty(),
"Item should have weight or children with weight"
);
assert!(child.total_weight() > 0);
}
}
#[test]
fn test_total_weight_decreases_when_excluded() {
let issues = sample_issues(5);
let mut tree = build_issues_tree(&issues);
let full_weight = tree.total_weight();
tree.children[0].included = false;
let reduced_weight = tree.total_weight();
assert!(reduced_weight < full_weight);
}
#[test]
fn test_density_calculation() {
let mut node = TrimNode::new(0, NodeKind::Item { index: 0 }, 100);
node.value = 0.5;
assert!((node.density() - 0.005).abs() < 0.0001);
let zero_node = TrimNode::new(1, NodeKind::Item { index: 1 }, 0);
assert_eq!(zero_node.density(), 0.0);
}
}