Skip to main content

devboy_format_pipeline/
toon.rs

1//! TOON (Token-Oriented Object Notation) encoding for tool output.
2//!
3//! Wrappers around `toon_format::encode` with optimal settings for LLM.
4//! Supports three detail levels (TrimLevel) for controlling
5//! the number of fields during budget trimming.
6
7use devboy_core::{Comment, Discussion, FileDiff, Issue, MergeRequest, Result};
8use serde::Serialize;
9use toon_format::EncodeOptions;
10use toon_format::types::KeyFoldingMode;
11
12/// Detail level for TOON encoding.
13///
14/// Controls which fields are included in the output.
15/// Used by budget pipeline for progressive detail reduction.
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub enum TrimLevel {
18    /// All fields (~750 tokens/issue)
19    Full,
20    /// Core fields, without timestamps and avatar_url (~400 tokens/issue)
21    Standard,
22    /// Only key fields: key, title, state (~150 tokens/issue)
23    Minimal,
24}
25
26/// Optimal TOON settings for LLM: minimal indent, key folding.
27fn default_opts() -> EncodeOptions {
28    EncodeOptions::new()
29        .with_spaces(1)
30        .with_key_folding(KeyFoldingMode::Safe)
31}
32
33/// Encode any Serialize type into TOON.
34pub fn encode_value<T: Serialize>(value: &T) -> Result<String> {
35    toon_format::encode(value, &default_opts())
36        .map_err(|e| devboy_core::Error::Other(anyhow::anyhow!("TOON encode: {e}")))
37}
38
39/// Encode an array of issues into TOON with the specified detail level.
40pub fn encode_issues(issues: &[Issue], level: TrimLevel) -> Result<String> {
41    match level {
42        TrimLevel::Full => encode_value(&issues),
43        TrimLevel::Standard => {
44            let views: Vec<IssueStandard> = issues.iter().map(IssueStandard::from).collect();
45            encode_value(&views)
46        }
47        TrimLevel::Minimal => {
48            let views: Vec<IssueMinimal> = issues.iter().map(IssueMinimal::from).collect();
49            encode_value(&views)
50        }
51    }
52}
53
54/// Encode an array of merge requests into TOON with the specified detail level.
55pub fn encode_merge_requests(mrs: &[MergeRequest], level: TrimLevel) -> Result<String> {
56    match level {
57        TrimLevel::Full => encode_value(&mrs),
58        TrimLevel::Standard => {
59            let views: Vec<MrStandard> = mrs.iter().map(MrStandard::from).collect();
60            encode_value(&views)
61        }
62        TrimLevel::Minimal => {
63            let views: Vec<MrMinimal> = mrs.iter().map(MrMinimal::from).collect();
64            encode_value(&views)
65        }
66    }
67}
68
69/// Encode an array of file diffs into TOON.
70pub fn encode_diffs(diffs: &[FileDiff]) -> Result<String> {
71    encode_value(&diffs)
72}
73
74/// Encode an array of comments into TOON.
75pub fn encode_comments(comments: &[Comment]) -> Result<String> {
76    encode_value(&comments)
77}
78
79/// Encode an array of discussions into TOON.
80pub fn encode_discussions(discussions: &[Discussion]) -> Result<String> {
81    encode_value(&discussions)
82}
83
84// ============================================================================
85// View structs for Standard and Minimal levels
86// ============================================================================
87
88/// Issue at Standard level -- without timestamps and avatar.
89#[derive(Serialize)]
90struct IssueStandard<'a> {
91    key: &'a str,
92    title: &'a str,
93    state: &'a str,
94    source: &'a str,
95    #[serde(skip_serializing_if = "Option::is_none")]
96    priority: Option<&'a str>,
97    #[serde(skip_serializing_if = "Vec::is_empty")]
98    labels: &'a Vec<String>,
99    #[serde(skip_serializing_if = "Option::is_none")]
100    description: Option<&'a str>,
101    #[serde(skip_serializing_if = "Option::is_none")]
102    author: Option<&'a str>,
103    #[serde(skip_serializing_if = "Option::is_none")]
104    url: Option<&'a str>,
105}
106
107impl<'a> From<&'a Issue> for IssueStandard<'a> {
108    fn from(i: &'a Issue) -> Self {
109        Self {
110            key: &i.key,
111            title: &i.title,
112            state: &i.state,
113            source: &i.source,
114            priority: i.priority.as_deref(),
115            labels: &i.labels,
116            description: i.description.as_deref(),
117            author: i.author.as_ref().map(|u| u.username.as_str()),
118            url: i.url.as_deref(),
119        }
120    }
121}
122
123/// Issue at Minimal level -- only key, title, state.
124#[derive(Serialize)]
125struct IssueMinimal<'a> {
126    key: &'a str,
127    title: &'a str,
128    state: &'a str,
129}
130
131impl<'a> From<&'a Issue> for IssueMinimal<'a> {
132    fn from(i: &'a Issue) -> Self {
133        Self {
134            key: &i.key,
135            title: &i.title,
136            state: &i.state,
137        }
138    }
139}
140
141/// MergeRequest at Standard level.
142#[derive(Serialize)]
143struct MrStandard<'a> {
144    key: &'a str,
145    title: &'a str,
146    state: &'a str,
147    source: &'a str,
148    source_branch: &'a str,
149    target_branch: &'a str,
150    draft: bool,
151    #[serde(skip_serializing_if = "Vec::is_empty")]
152    labels: &'a Vec<String>,
153    #[serde(skip_serializing_if = "Option::is_none")]
154    description: Option<&'a str>,
155    #[serde(skip_serializing_if = "Option::is_none")]
156    author: Option<&'a str>,
157    #[serde(skip_serializing_if = "Option::is_none")]
158    url: Option<&'a str>,
159}
160
161impl<'a> From<&'a MergeRequest> for MrStandard<'a> {
162    fn from(mr: &'a MergeRequest) -> Self {
163        Self {
164            key: &mr.key,
165            title: &mr.title,
166            state: &mr.state,
167            source: &mr.source,
168            source_branch: &mr.source_branch,
169            target_branch: &mr.target_branch,
170            draft: mr.draft,
171            labels: &mr.labels,
172            description: mr.description.as_deref(),
173            author: mr.author.as_ref().map(|u| u.username.as_str()),
174            url: mr.url.as_deref(),
175        }
176    }
177}
178
179/// MergeRequest at Minimal level.
180#[derive(Serialize)]
181struct MrMinimal<'a> {
182    key: &'a str,
183    title: &'a str,
184    state: &'a str,
185    source_branch: &'a str,
186    target_branch: &'a str,
187}
188
189impl<'a> From<&'a MergeRequest> for MrMinimal<'a> {
190    fn from(mr: &'a MergeRequest) -> Self {
191        Self {
192            key: &mr.key,
193            title: &mr.title,
194            state: &mr.state,
195            source_branch: &mr.source_branch,
196            target_branch: &mr.target_branch,
197        }
198    }
199}
200
201#[cfg(test)]
202mod tests {
203    use super::*;
204    use devboy_core::User;
205
206    fn sample_issue() -> Issue {
207        Issue {
208            key: "gh#1".into(),
209            title: "Fix login bug".into(),
210            description: Some("Users cannot login with SSO".into()),
211            state: "open".into(),
212            source: "github".into(),
213            priority: Some("high".into()),
214            labels: vec!["bug".into(), "auth".into()],
215            author: Some(User {
216                id: "1".into(),
217                username: "alice".into(),
218                name: Some("Alice Smith".into()),
219                email: None,
220                avatar_url: None,
221            }),
222            assignees: vec![],
223            url: Some("https://github.com/test/repo/issues/1".into()),
224            created_at: Some("2024-01-01T00:00:00Z".into()),
225            updated_at: Some("2024-01-02T00:00:00Z".into()),
226            attachments_count: None,
227            parent: None,
228            subtasks: vec![],
229            custom_fields: std::collections::HashMap::new(),
230        }
231    }
232
233    fn sample_mr() -> MergeRequest {
234        MergeRequest {
235            key: "pr#42".into(),
236            title: "Add SSO support".into(),
237            description: Some("Implements SAML-based SSO".into()),
238            state: "open".into(),
239            source: "github".into(),
240            source_branch: "feat/sso".into(),
241            target_branch: "main".into(),
242            author: Some(User {
243                id: "2".into(),
244                username: "bob".into(),
245                name: None,
246                email: None,
247                avatar_url: None,
248            }),
249            assignees: vec![],
250            reviewers: vec![],
251            labels: vec!["feature".into()],
252            draft: false,
253            url: Some("https://github.com/test/repo/pull/42".into()),
254            created_at: Some("2024-01-01T00:00:00Z".into()),
255            updated_at: Some("2024-01-02T00:00:00Z".into()),
256        }
257    }
258
259    #[test]
260    fn test_encode_issues_full() {
261        let issues = vec![sample_issue()];
262        let result = encode_issues(&issues, TrimLevel::Full).unwrap();
263        assert!(result.contains("gh#1"));
264        assert!(result.contains("Fix login bug"));
265        assert!(result.contains("2024-01-01")); // timestamps present in Full
266    }
267
268    #[test]
269    fn test_encode_issues_standard() {
270        let issues = vec![sample_issue()];
271        let result = encode_issues(&issues, TrimLevel::Standard).unwrap();
272        assert!(result.contains("gh#1"));
273        assert!(result.contains("Fix login bug"));
274        assert!(!result.contains("2024-01-01")); // no timestamps
275        assert!(!result.contains("avatar")); // no avatar
276    }
277
278    #[test]
279    fn test_encode_issues_minimal() {
280        let issues = vec![sample_issue()];
281        let result = encode_issues(&issues, TrimLevel::Minimal).unwrap();
282        assert!(result.contains("gh#1"));
283        assert!(result.contains("Fix login bug"));
284        assert!(result.contains("open"));
285        assert!(!result.contains("github")); // no source
286        assert!(!result.contains("alice")); // no author
287    }
288
289    #[test]
290    fn test_encode_merge_requests_full() {
291        let mrs = vec![sample_mr()];
292        let result = encode_merge_requests(&mrs, TrimLevel::Full).unwrap();
293        assert!(result.contains("pr#42"));
294        assert!(result.contains("Add SSO support"));
295    }
296
297    #[test]
298    fn test_encode_merge_requests_standard() {
299        let mrs = vec![sample_mr()];
300        let result = encode_merge_requests(&mrs, TrimLevel::Standard).unwrap();
301        assert!(result.contains("pr#42"));
302        assert!(result.contains("Add SSO support"));
303        assert!(result.contains("feat/sso"));
304        assert!(!result.contains("2024-01-01")); // no timestamps
305    }
306
307    #[test]
308    fn test_encode_merge_requests_minimal() {
309        let mrs = vec![sample_mr()];
310        let result = encode_merge_requests(&mrs, TrimLevel::Minimal).unwrap();
311        assert!(result.contains("pr#42"));
312        assert!(result.contains("feat/sso"));
313        assert!(!result.contains("bob"));
314    }
315
316    #[test]
317    fn test_encode_diffs() {
318        let diffs = vec![FileDiff {
319            file_path: "src/main.rs".into(),
320            old_path: None,
321            new_file: false,
322            deleted_file: false,
323            renamed_file: false,
324            diff: "+added line\n-removed line".into(),
325            additions: Some(1),
326            deletions: Some(1),
327        }];
328        let result = encode_diffs(&diffs).unwrap();
329        assert!(result.contains("src/main.rs"));
330        assert!(result.contains("added line"));
331    }
332
333    #[test]
334    fn test_encode_comments() {
335        let comments = vec![Comment {
336            id: "c1".into(),
337            body: "LGTM!".into(),
338            author: None,
339            created_at: None,
340            updated_at: None,
341            position: None,
342        }];
343        let result = encode_comments(&comments).unwrap();
344        assert!(result.contains("LGTM!"));
345    }
346
347    #[test]
348    fn test_encode_discussions() {
349        let discussions = vec![Discussion {
350            id: "d1".into(),
351            resolved: false,
352            resolved_by: None,
353            comments: vec![Comment {
354                id: "c1".into(),
355                body: "Needs review".into(),
356                author: None,
357                created_at: None,
358                updated_at: None,
359                position: None,
360            }],
361            position: None,
362        }];
363        let result = encode_discussions(&discussions).unwrap();
364        assert!(result.contains("Needs review"));
365    }
366
367    #[test]
368    fn test_toon_smaller_than_json() {
369        let issues: Vec<Issue> = (1..=10)
370            .map(|i| Issue {
371                key: format!("gh#{i}"),
372                title: format!("Issue {i}"),
373                description: Some(format!("Description for issue {i}")),
374                state: "open".into(),
375                source: "github".into(),
376                priority: None,
377                labels: vec!["bug".into()],
378                author: Some(User {
379                    id: format!("{i}"),
380                    username: format!("user{i}"),
381                    name: None,
382                    email: None,
383                    avatar_url: None,
384                }),
385                assignees: vec![],
386                url: Some(format!("https://github.com/test/repo/issues/{i}")),
387                created_at: Some("2024-01-01T00:00:00Z".into()),
388                updated_at: Some("2024-01-02T00:00:00Z".into()),
389                attachments_count: None,
390                parent: None,
391                subtasks: vec![],
392                custom_fields: std::collections::HashMap::new(),
393            })
394            .collect();
395
396        let json = serde_json::to_string_pretty(&issues).unwrap();
397        let toon = encode_issues(&issues, TrimLevel::Full).unwrap();
398
399        // TOON should be more compact than JSON
400        assert!(
401            toon.len() < json.len(),
402            "TOON ({}) should be smaller than JSON ({})",
403            toon.len(),
404            json.len()
405        );
406    }
407
408    #[test]
409    fn test_minimal_much_smaller_than_full() {
410        let issues: Vec<Issue> = (1..=5).map(|i| Issue {
411            key: format!("gh#{i}"),
412            title: format!("Issue {i}"),
413            description: Some("A long description that takes many tokens and should be excluded in minimal mode".into()),
414            state: "open".into(),
415            source: "github".into(),
416            priority: Some("high".into()),
417            labels: vec!["bug".into(), "urgent".into()],
418            author: Some(User {
419                id: format!("{i}"),
420                username: format!("user{i}"),
421                name: Some(format!("User {i}")),
422                email: Some(format!("user{i}@example.com")),
423                avatar_url: Some("https://example.com/avatar.png".into()),
424            }),
425            assignees: vec![],
426            url: Some(format!("https://github.com/test/repo/issues/{i}")),
427            created_at: Some("2024-01-01T00:00:00Z".into()),
428            updated_at: Some("2024-01-02T00:00:00Z".into()),
429            attachments_count: None,
430            parent: None,
431            subtasks: vec![],
432            custom_fields: std::collections::HashMap::new(),
433        }).collect();
434
435        let full = encode_issues(&issues, TrimLevel::Full).unwrap();
436        let minimal = encode_issues(&issues, TrimLevel::Minimal).unwrap();
437
438        // Minimal should be significantly smaller than Full
439        assert!(
440            minimal.len() * 3 < full.len(),
441            "Minimal ({}) should be at least 3x smaller than Full ({})",
442            minimal.len(),
443            full.len()
444        );
445    }
446}