Skip to main content

devboy_format_pipeline/
toon.rs

1//! TOON (Token-Oriented Object Notation) encoding for tool output.
2//!
3//! Wrappers around `toon_format::encode` with optimal settings for LLM.
4//! Supports three detail levels (TrimLevel) for controlling
5//! the number of fields during budget trimming.
6
7use devboy_core::{Comment, Discussion, FileDiff, Issue, MergeRequest, Result};
8use serde::Serialize;
9use toon_format::EncodeOptions;
10use toon_format::types::KeyFoldingMode;
11
12/// Detail level for TOON encoding.
13///
14/// Controls which fields are included in the output.
15/// Used by budget pipeline for progressive detail reduction.
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub enum TrimLevel {
18    /// All fields (~750 tokens/issue)
19    Full,
20    /// Core fields, without timestamps and avatar_url (~400 tokens/issue)
21    Standard,
22    /// Only key fields: key, title, state (~150 tokens/issue)
23    Minimal,
24}
25
26/// Optimal TOON settings for LLM: minimal indent, key folding.
27fn default_opts() -> EncodeOptions {
28    EncodeOptions::new()
29        .with_spaces(1)
30        .with_key_folding(KeyFoldingMode::Safe)
31}
32
33/// Encode any Serialize type into TOON.
34pub fn encode_value<T: Serialize>(value: &T) -> Result<String> {
35    toon_format::encode(value, &default_opts())
36        .map_err(|e| devboy_core::Error::Other(anyhow::anyhow!("TOON encode: {e}")))
37}
38
39/// Encode an array of issues into TOON with the specified detail level.
40pub fn encode_issues(issues: &[Issue], level: TrimLevel) -> Result<String> {
41    match level {
42        TrimLevel::Full => encode_value(&issues),
43        TrimLevel::Standard => {
44            let views: Vec<IssueStandard> = issues.iter().map(IssueStandard::from).collect();
45            encode_value(&views)
46        }
47        TrimLevel::Minimal => {
48            let views: Vec<IssueMinimal> = issues.iter().map(IssueMinimal::from).collect();
49            encode_value(&views)
50        }
51    }
52}
53
54/// Encode an array of merge requests into TOON with the specified detail level.
55pub fn encode_merge_requests(mrs: &[MergeRequest], level: TrimLevel) -> Result<String> {
56    match level {
57        TrimLevel::Full => encode_value(&mrs),
58        TrimLevel::Standard => {
59            let views: Vec<MrStandard> = mrs.iter().map(MrStandard::from).collect();
60            encode_value(&views)
61        }
62        TrimLevel::Minimal => {
63            let views: Vec<MrMinimal> = mrs.iter().map(MrMinimal::from).collect();
64            encode_value(&views)
65        }
66    }
67}
68
69/// Encode an array of file diffs into TOON.
70pub fn encode_diffs(diffs: &[FileDiff]) -> Result<String> {
71    encode_value(&diffs)
72}
73
74/// Encode an array of comments into TOON.
75pub fn encode_comments(comments: &[Comment]) -> Result<String> {
76    encode_value(&comments)
77}
78
79/// Encode an array of discussions into TOON.
80pub fn encode_discussions(discussions: &[Discussion]) -> Result<String> {
81    encode_value(&discussions)
82}
83
84// ============================================================================
85// View structs for Standard and Minimal levels
86// ============================================================================
87
88/// Issue at Standard level -- without timestamps and avatar.
89#[derive(Serialize)]
90struct IssueStandard<'a> {
91    key: &'a str,
92    title: &'a str,
93    state: &'a str,
94    source: &'a str,
95    #[serde(skip_serializing_if = "Option::is_none")]
96    priority: Option<&'a str>,
97    #[serde(skip_serializing_if = "Vec::is_empty")]
98    labels: &'a Vec<String>,
99    #[serde(skip_serializing_if = "Option::is_none")]
100    description: Option<&'a str>,
101    #[serde(skip_serializing_if = "Option::is_none")]
102    author: Option<&'a str>,
103    #[serde(skip_serializing_if = "Option::is_none")]
104    url: Option<&'a str>,
105}
106
107impl<'a> From<&'a Issue> for IssueStandard<'a> {
108    fn from(i: &'a Issue) -> Self {
109        Self {
110            key: &i.key,
111            title: &i.title,
112            state: &i.state,
113            source: &i.source,
114            priority: i.priority.as_deref(),
115            labels: &i.labels,
116            description: i.description.as_deref(),
117            author: i.author.as_ref().map(|u| u.username.as_str()),
118            url: i.url.as_deref(),
119        }
120    }
121}
122
123/// Issue at Minimal level -- only key, title, state.
124#[derive(Serialize)]
125struct IssueMinimal<'a> {
126    key: &'a str,
127    title: &'a str,
128    state: &'a str,
129}
130
131impl<'a> From<&'a Issue> for IssueMinimal<'a> {
132    fn from(i: &'a Issue) -> Self {
133        Self {
134            key: &i.key,
135            title: &i.title,
136            state: &i.state,
137        }
138    }
139}
140
141/// MergeRequest at Standard level.
142#[derive(Serialize)]
143struct MrStandard<'a> {
144    key: &'a str,
145    title: &'a str,
146    state: &'a str,
147    source: &'a str,
148    source_branch: &'a str,
149    target_branch: &'a str,
150    draft: bool,
151    #[serde(skip_serializing_if = "Vec::is_empty")]
152    labels: &'a Vec<String>,
153    #[serde(skip_serializing_if = "Option::is_none")]
154    description: Option<&'a str>,
155    #[serde(skip_serializing_if = "Option::is_none")]
156    author: Option<&'a str>,
157    #[serde(skip_serializing_if = "Option::is_none")]
158    url: Option<&'a str>,
159}
160
161impl<'a> From<&'a MergeRequest> for MrStandard<'a> {
162    fn from(mr: &'a MergeRequest) -> Self {
163        Self {
164            key: &mr.key,
165            title: &mr.title,
166            state: &mr.state,
167            source: &mr.source,
168            source_branch: &mr.source_branch,
169            target_branch: &mr.target_branch,
170            draft: mr.draft,
171            labels: &mr.labels,
172            description: mr.description.as_deref(),
173            author: mr.author.as_ref().map(|u| u.username.as_str()),
174            url: mr.url.as_deref(),
175        }
176    }
177}
178
179/// MergeRequest at Minimal level.
180#[derive(Serialize)]
181struct MrMinimal<'a> {
182    key: &'a str,
183    title: &'a str,
184    state: &'a str,
185    source_branch: &'a str,
186    target_branch: &'a str,
187}
188
189impl<'a> From<&'a MergeRequest> for MrMinimal<'a> {
190    fn from(mr: &'a MergeRequest) -> Self {
191        Self {
192            key: &mr.key,
193            title: &mr.title,
194            state: &mr.state,
195            source_branch: &mr.source_branch,
196            target_branch: &mr.target_branch,
197        }
198    }
199}
200
201#[cfg(test)]
202mod tests {
203    use super::*;
204    use devboy_core::User;
205
206    fn sample_issue() -> Issue {
207        Issue {
208            key: "gh#1".into(),
209            title: "Fix login bug".into(),
210            description: Some("Users cannot login with SSO".into()),
211            state: "open".into(),
212            source: "github".into(),
213            priority: Some("high".into()),
214            labels: vec!["bug".into(), "auth".into()],
215            author: Some(User {
216                id: "1".into(),
217                username: "alice".into(),
218                name: Some("Alice Smith".into()),
219                email: None,
220                avatar_url: None,
221            }),
222            assignees: vec![],
223            url: Some("https://github.com/test/repo/issues/1".into()),
224            created_at: Some("2024-01-01T00:00:00Z".into()),
225            updated_at: Some("2024-01-02T00:00:00Z".into()),
226            attachments_count: None,
227            parent: None,
228            subtasks: vec![],
229        }
230    }
231
232    fn sample_mr() -> MergeRequest {
233        MergeRequest {
234            key: "pr#42".into(),
235            title: "Add SSO support".into(),
236            description: Some("Implements SAML-based SSO".into()),
237            state: "open".into(),
238            source: "github".into(),
239            source_branch: "feat/sso".into(),
240            target_branch: "main".into(),
241            author: Some(User {
242                id: "2".into(),
243                username: "bob".into(),
244                name: None,
245                email: None,
246                avatar_url: None,
247            }),
248            assignees: vec![],
249            reviewers: vec![],
250            labels: vec!["feature".into()],
251            draft: false,
252            url: Some("https://github.com/test/repo/pull/42".into()),
253            created_at: Some("2024-01-01T00:00:00Z".into()),
254            updated_at: Some("2024-01-02T00:00:00Z".into()),
255        }
256    }
257
258    #[test]
259    fn test_encode_issues_full() {
260        let issues = vec![sample_issue()];
261        let result = encode_issues(&issues, TrimLevel::Full).unwrap();
262        assert!(result.contains("gh#1"));
263        assert!(result.contains("Fix login bug"));
264        assert!(result.contains("2024-01-01")); // timestamps present in Full
265    }
266
267    #[test]
268    fn test_encode_issues_standard() {
269        let issues = vec![sample_issue()];
270        let result = encode_issues(&issues, TrimLevel::Standard).unwrap();
271        assert!(result.contains("gh#1"));
272        assert!(result.contains("Fix login bug"));
273        assert!(!result.contains("2024-01-01")); // no timestamps
274        assert!(!result.contains("avatar")); // no avatar
275    }
276
277    #[test]
278    fn test_encode_issues_minimal() {
279        let issues = vec![sample_issue()];
280        let result = encode_issues(&issues, TrimLevel::Minimal).unwrap();
281        assert!(result.contains("gh#1"));
282        assert!(result.contains("Fix login bug"));
283        assert!(result.contains("open"));
284        assert!(!result.contains("github")); // no source
285        assert!(!result.contains("alice")); // no author
286    }
287
288    #[test]
289    fn test_encode_merge_requests_full() {
290        let mrs = vec![sample_mr()];
291        let result = encode_merge_requests(&mrs, TrimLevel::Full).unwrap();
292        assert!(result.contains("pr#42"));
293        assert!(result.contains("Add SSO support"));
294    }
295
296    #[test]
297    fn test_encode_merge_requests_standard() {
298        let mrs = vec![sample_mr()];
299        let result = encode_merge_requests(&mrs, TrimLevel::Standard).unwrap();
300        assert!(result.contains("pr#42"));
301        assert!(result.contains("Add SSO support"));
302        assert!(result.contains("feat/sso"));
303        assert!(!result.contains("2024-01-01")); // no timestamps
304    }
305
306    #[test]
307    fn test_encode_merge_requests_minimal() {
308        let mrs = vec![sample_mr()];
309        let result = encode_merge_requests(&mrs, TrimLevel::Minimal).unwrap();
310        assert!(result.contains("pr#42"));
311        assert!(result.contains("feat/sso"));
312        assert!(!result.contains("bob"));
313    }
314
315    #[test]
316    fn test_encode_diffs() {
317        let diffs = vec![FileDiff {
318            file_path: "src/main.rs".into(),
319            old_path: None,
320            new_file: false,
321            deleted_file: false,
322            renamed_file: false,
323            diff: "+added line\n-removed line".into(),
324            additions: Some(1),
325            deletions: Some(1),
326        }];
327        let result = encode_diffs(&diffs).unwrap();
328        assert!(result.contains("src/main.rs"));
329        assert!(result.contains("added line"));
330    }
331
332    #[test]
333    fn test_encode_comments() {
334        let comments = vec![Comment {
335            id: "c1".into(),
336            body: "LGTM!".into(),
337            author: None,
338            created_at: None,
339            updated_at: None,
340            position: None,
341        }];
342        let result = encode_comments(&comments).unwrap();
343        assert!(result.contains("LGTM!"));
344    }
345
346    #[test]
347    fn test_encode_discussions() {
348        let discussions = vec![Discussion {
349            id: "d1".into(),
350            resolved: false,
351            resolved_by: None,
352            comments: vec![Comment {
353                id: "c1".into(),
354                body: "Needs review".into(),
355                author: None,
356                created_at: None,
357                updated_at: None,
358                position: None,
359            }],
360            position: None,
361        }];
362        let result = encode_discussions(&discussions).unwrap();
363        assert!(result.contains("Needs review"));
364    }
365
366    #[test]
367    fn test_toon_smaller_than_json() {
368        let issues: Vec<Issue> = (1..=10)
369            .map(|i| Issue {
370                key: format!("gh#{i}"),
371                title: format!("Issue {i}"),
372                description: Some(format!("Description for issue {i}")),
373                state: "open".into(),
374                source: "github".into(),
375                priority: None,
376                labels: vec!["bug".into()],
377                author: Some(User {
378                    id: format!("{i}"),
379                    username: format!("user{i}"),
380                    name: None,
381                    email: None,
382                    avatar_url: None,
383                }),
384                assignees: vec![],
385                url: Some(format!("https://github.com/test/repo/issues/{i}")),
386                created_at: Some("2024-01-01T00:00:00Z".into()),
387                updated_at: Some("2024-01-02T00:00:00Z".into()),
388                attachments_count: None,
389                parent: None,
390                subtasks: vec![],
391            })
392            .collect();
393
394        let json = serde_json::to_string_pretty(&issues).unwrap();
395        let toon = encode_issues(&issues, TrimLevel::Full).unwrap();
396
397        // TOON should be more compact than JSON
398        assert!(
399            toon.len() < json.len(),
400            "TOON ({}) should be smaller than JSON ({})",
401            toon.len(),
402            json.len()
403        );
404    }
405
406    #[test]
407    fn test_minimal_much_smaller_than_full() {
408        let issues: Vec<Issue> = (1..=5).map(|i| Issue {
409            key: format!("gh#{i}"),
410            title: format!("Issue {i}"),
411            description: Some("A long description that takes many tokens and should be excluded in minimal mode".into()),
412            state: "open".into(),
413            source: "github".into(),
414            priority: Some("high".into()),
415            labels: vec!["bug".into(), "urgent".into()],
416            author: Some(User {
417                id: format!("{i}"),
418                username: format!("user{i}"),
419                name: Some(format!("User {i}")),
420                email: Some(format!("user{i}@example.com")),
421                avatar_url: Some("https://example.com/avatar.png".into()),
422            }),
423            assignees: vec![],
424            url: Some(format!("https://github.com/test/repo/issues/{i}")),
425            created_at: Some("2024-01-01T00:00:00Z".into()),
426            updated_at: Some("2024-01-02T00:00:00Z".into()),
427            attachments_count: None,
428            parent: None,
429            subtasks: vec![],
430        }).collect();
431
432        let full = encode_issues(&issues, TrimLevel::Full).unwrap();
433        let minimal = encode_issues(&issues, TrimLevel::Minimal).unwrap();
434
435        // Minimal should be significantly smaller than Full
436        assert!(
437            minimal.len() * 3 < full.len(),
438            "Minimal ({}) should be at least 3x smaller than Full ({})",
439            minimal.len(),
440            full.len()
441        );
442    }
443}