Skip to main content

devboy_format_pipeline/
toon.rs

1//! TOON (Token-Oriented Object Notation) encoding for tool output.
2//!
3//! Wrappers around `toon_format::encode` with optimal settings for LLM.
4//! Supports three detail levels (TrimLevel) for controlling
5//! the number of fields during budget trimming.
6
7use devboy_core::{Comment, Discussion, FileDiff, Issue, MergeRequest, Result};
8use serde::Serialize;
9use toon_format::EncodeOptions;
10use toon_format::types::KeyFoldingMode;
11
12/// Detail level for TOON encoding.
13///
14/// Controls which fields are included in the output.
15/// Used by budget pipeline for progressive detail reduction.
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub enum TrimLevel {
18    /// All fields (~750 tokens/issue)
19    Full,
20    /// Core fields, without timestamps and avatar_url (~400 tokens/issue)
21    Standard,
22    /// Only key fields: key, title, state (~150 tokens/issue)
23    Minimal,
24}
25
26/// Optimal TOON settings for LLM: minimal indent, key folding.
27fn default_opts() -> EncodeOptions {
28    EncodeOptions::new()
29        .with_spaces(1)
30        .with_key_folding(KeyFoldingMode::Safe)
31}
32
33/// Encode any Serialize type into TOON.
34pub fn encode_value<T: Serialize>(value: &T) -> Result<String> {
35    toon_format::encode(value, &default_opts())
36        .map_err(|e| devboy_core::Error::Other(anyhow::anyhow!("TOON encode: {e}")))
37}
38
39/// Encode an array of issues into TOON with the specified detail level.
40pub fn encode_issues(issues: &[Issue], level: TrimLevel) -> Result<String> {
41    match level {
42        TrimLevel::Full => encode_value(&issues),
43        TrimLevel::Standard => {
44            let views: Vec<IssueStandard> = issues.iter().map(IssueStandard::from).collect();
45            encode_value(&views)
46        }
47        TrimLevel::Minimal => {
48            let views: Vec<IssueMinimal> = issues.iter().map(IssueMinimal::from).collect();
49            encode_value(&views)
50        }
51    }
52}
53
54/// Encode an array of merge requests into TOON with the specified detail level.
55pub fn encode_merge_requests(mrs: &[MergeRequest], level: TrimLevel) -> Result<String> {
56    match level {
57        TrimLevel::Full => encode_value(&mrs),
58        TrimLevel::Standard => {
59            let views: Vec<MrStandard> = mrs.iter().map(MrStandard::from).collect();
60            encode_value(&views)
61        }
62        TrimLevel::Minimal => {
63            let views: Vec<MrMinimal> = mrs.iter().map(MrMinimal::from).collect();
64            encode_value(&views)
65        }
66    }
67}
68
69/// Encode an array of file diffs into TOON.
70pub fn encode_diffs(diffs: &[FileDiff]) -> Result<String> {
71    encode_value(&diffs)
72}
73
74/// Encode an array of comments into TOON.
75pub fn encode_comments(comments: &[Comment]) -> Result<String> {
76    encode_value(&comments)
77}
78
79/// Encode an array of discussions into TOON.
80pub fn encode_discussions(discussions: &[Discussion]) -> Result<String> {
81    encode_value(&discussions)
82}
83
84// ============================================================================
85// View structs for Standard and Minimal levels
86// ============================================================================
87
88/// Issue at Standard level -- without timestamps and avatar.
89#[derive(Serialize)]
90struct IssueStandard<'a> {
91    key: &'a str,
92    title: &'a str,
93    state: &'a str,
94    // DEV-1578: rich display status + category, alongside the binary state
95    #[serde(skip_serializing_if = "Option::is_none")]
96    status: Option<&'a str>,
97    #[serde(skip_serializing_if = "Option::is_none")]
98    status_category: Option<&'a str>,
99    source: &'a str,
100    #[serde(skip_serializing_if = "Option::is_none")]
101    priority: Option<&'a str>,
102    #[serde(skip_serializing_if = "Vec::is_empty")]
103    labels: &'a Vec<String>,
104    #[serde(skip_serializing_if = "Option::is_none")]
105    description: Option<&'a str>,
106    #[serde(skip_serializing_if = "Option::is_none")]
107    author: Option<&'a str>,
108    #[serde(skip_serializing_if = "Option::is_none")]
109    url: Option<&'a str>,
110}
111
112impl<'a> From<&'a Issue> for IssueStandard<'a> {
113    fn from(i: &'a Issue) -> Self {
114        Self {
115            key: &i.key,
116            title: &i.title,
117            state: &i.state,
118            status: i.status.as_deref(),
119            status_category: i.status_category.as_deref(),
120            source: &i.source,
121            priority: i.priority.as_deref(),
122            labels: &i.labels,
123            description: i.description.as_deref(),
124            author: i.author.as_ref().map(|u| u.username.as_str()),
125            url: i.url.as_deref(),
126        }
127    }
128}
129
130/// Issue at Minimal level -- only key, title, state.
131#[derive(Serialize)]
132struct IssueMinimal<'a> {
133    key: &'a str,
134    title: &'a str,
135    state: &'a str,
136}
137
138impl<'a> From<&'a Issue> for IssueMinimal<'a> {
139    fn from(i: &'a Issue) -> Self {
140        Self {
141            key: &i.key,
142            title: &i.title,
143            state: &i.state,
144        }
145    }
146}
147
148/// MergeRequest at Standard level.
149#[derive(Serialize)]
150struct MrStandard<'a> {
151    key: &'a str,
152    title: &'a str,
153    state: &'a str,
154    source: &'a str,
155    source_branch: &'a str,
156    target_branch: &'a str,
157    draft: bool,
158    #[serde(skip_serializing_if = "Vec::is_empty")]
159    labels: &'a Vec<String>,
160    #[serde(skip_serializing_if = "Option::is_none")]
161    description: Option<&'a str>,
162    #[serde(skip_serializing_if = "Option::is_none")]
163    author: Option<&'a str>,
164    #[serde(skip_serializing_if = "Option::is_none")]
165    url: Option<&'a str>,
166}
167
168impl<'a> From<&'a MergeRequest> for MrStandard<'a> {
169    fn from(mr: &'a MergeRequest) -> Self {
170        Self {
171            key: &mr.key,
172            title: &mr.title,
173            state: &mr.state,
174            source: &mr.source,
175            source_branch: &mr.source_branch,
176            target_branch: &mr.target_branch,
177            draft: mr.draft,
178            labels: &mr.labels,
179            description: mr.description.as_deref(),
180            author: mr.author.as_ref().map(|u| u.username.as_str()),
181            url: mr.url.as_deref(),
182        }
183    }
184}
185
186/// MergeRequest at Minimal level.
187#[derive(Serialize)]
188struct MrMinimal<'a> {
189    key: &'a str,
190    title: &'a str,
191    state: &'a str,
192    source_branch: &'a str,
193    target_branch: &'a str,
194}
195
196impl<'a> From<&'a MergeRequest> for MrMinimal<'a> {
197    fn from(mr: &'a MergeRequest) -> Self {
198        Self {
199            key: &mr.key,
200            title: &mr.title,
201            state: &mr.state,
202            source_branch: &mr.source_branch,
203            target_branch: &mr.target_branch,
204        }
205    }
206}
207
208#[cfg(test)]
209mod tests {
210    use super::*;
211    use devboy_core::User;
212
213    fn sample_issue() -> Issue {
214        Issue {
215            key: "gh#1".into(),
216            title: "Fix login bug".into(),
217            description: Some("Users cannot login with SSO".into()),
218            state: "open".into(),
219            source: "github".into(),
220            priority: Some("high".into()),
221            labels: vec!["bug".into(), "auth".into()],
222            author: Some(User {
223                id: "1".into(),
224                username: "alice".into(),
225                name: Some("Alice Smith".into()),
226                email: None,
227                avatar_url: None,
228            }),
229            assignees: vec![],
230            url: Some("https://github.com/test/repo/issues/1".into()),
231            created_at: Some("2024-01-01T00:00:00Z".into()),
232            updated_at: Some("2024-01-02T00:00:00Z".into()),
233            attachments_count: None,
234            parent: None,
235            subtasks: vec![],
236            custom_fields: std::collections::HashMap::new(),
237            ..Default::default()
238        }
239    }
240
241    fn sample_mr() -> MergeRequest {
242        MergeRequest {
243            key: "pr#42".into(),
244            title: "Add SSO support".into(),
245            description: Some("Implements SAML-based SSO".into()),
246            state: "open".into(),
247            source: "github".into(),
248            source_branch: "feat/sso".into(),
249            target_branch: "main".into(),
250            author: Some(User {
251                id: "2".into(),
252                username: "bob".into(),
253                name: None,
254                email: None,
255                avatar_url: None,
256            }),
257            assignees: vec![],
258            reviewers: vec![],
259            labels: vec!["feature".into()],
260            draft: false,
261            url: Some("https://github.com/test/repo/pull/42".into()),
262            created_at: Some("2024-01-01T00:00:00Z".into()),
263            updated_at: Some("2024-01-02T00:00:00Z".into()),
264        }
265    }
266
267    #[test]
268    fn test_encode_issues_full() {
269        let issues = vec![sample_issue()];
270        let result = encode_issues(&issues, TrimLevel::Full).unwrap();
271        assert!(result.contains("gh#1"));
272        assert!(result.contains("Fix login bug"));
273        assert!(result.contains("2024-01-01")); // timestamps present in Full
274    }
275
276    #[test]
277    fn test_encode_issues_standard() {
278        let issues = vec![sample_issue()];
279        let result = encode_issues(&issues, TrimLevel::Standard).unwrap();
280        assert!(result.contains("gh#1"));
281        assert!(result.contains("Fix login bug"));
282        assert!(!result.contains("2024-01-01")); // no timestamps
283        assert!(!result.contains("avatar")); // no avatar
284    }
285
286    // DEV-1578: the rich display status + category must surface in the
287    // Standard TOON view (the level get_issues defaults to), but stay out
288    // of the ultra-compact Minimal view.
289    #[test]
290    fn test_encode_issues_standard_includes_display_status() {
291        let mut issue = sample_issue();
292        issue.status = Some("ready to release".into());
293        issue.status_category = Some("in_progress".into());
294
295        let standard = encode_issues(&[issue.clone()], TrimLevel::Standard).unwrap();
296        assert!(standard.contains("ready to release"));
297        assert!(standard.contains("in_progress"));
298
299        let minimal = encode_issues(&[issue], TrimLevel::Minimal).unwrap();
300        assert!(!minimal.contains("ready to release"));
301    }
302
303    #[test]
304    fn test_encode_issues_minimal() {
305        let issues = vec![sample_issue()];
306        let result = encode_issues(&issues, TrimLevel::Minimal).unwrap();
307        assert!(result.contains("gh#1"));
308        assert!(result.contains("Fix login bug"));
309        assert!(result.contains("open"));
310        assert!(!result.contains("github")); // no source
311        assert!(!result.contains("alice")); // no author
312    }
313
314    #[test]
315    fn test_encode_merge_requests_full() {
316        let mrs = vec![sample_mr()];
317        let result = encode_merge_requests(&mrs, TrimLevel::Full).unwrap();
318        assert!(result.contains("pr#42"));
319        assert!(result.contains("Add SSO support"));
320    }
321
322    #[test]
323    fn test_encode_merge_requests_standard() {
324        let mrs = vec![sample_mr()];
325        let result = encode_merge_requests(&mrs, TrimLevel::Standard).unwrap();
326        assert!(result.contains("pr#42"));
327        assert!(result.contains("Add SSO support"));
328        assert!(result.contains("feat/sso"));
329        assert!(!result.contains("2024-01-01")); // no timestamps
330    }
331
332    #[test]
333    fn test_encode_merge_requests_minimal() {
334        let mrs = vec![sample_mr()];
335        let result = encode_merge_requests(&mrs, TrimLevel::Minimal).unwrap();
336        assert!(result.contains("pr#42"));
337        assert!(result.contains("feat/sso"));
338        assert!(!result.contains("bob"));
339    }
340
341    #[test]
342    fn test_encode_diffs() {
343        let diffs = vec![FileDiff {
344            file_path: "src/main.rs".into(),
345            old_path: None,
346            new_file: false,
347            deleted_file: false,
348            renamed_file: false,
349            diff: "+added line\n-removed line".into(),
350            additions: Some(1),
351            deletions: Some(1),
352        }];
353        let result = encode_diffs(&diffs).unwrap();
354        assert!(result.contains("src/main.rs"));
355        assert!(result.contains("added line"));
356    }
357
358    #[test]
359    fn test_encode_comments() {
360        let comments = vec![Comment {
361            id: "c1".into(),
362            body: "LGTM!".into(),
363            author: None,
364            created_at: None,
365            updated_at: None,
366            position: None,
367        }];
368        let result = encode_comments(&comments).unwrap();
369        assert!(result.contains("LGTM!"));
370    }
371
372    #[test]
373    fn test_encode_discussions() {
374        let discussions = vec![Discussion {
375            id: "d1".into(),
376            resolved: false,
377            resolved_by: None,
378            comments: vec![Comment {
379                id: "c1".into(),
380                body: "Needs review".into(),
381                author: None,
382                created_at: None,
383                updated_at: None,
384                position: None,
385            }],
386            position: None,
387        }];
388        let result = encode_discussions(&discussions).unwrap();
389        assert!(result.contains("Needs review"));
390    }
391
392    #[test]
393    fn test_toon_smaller_than_json() {
394        let issues: Vec<Issue> = (1..=10)
395            .map(|i| Issue {
396                key: format!("gh#{i}"),
397                title: format!("Issue {i}"),
398                description: Some(format!("Description for issue {i}")),
399                state: "open".into(),
400                source: "github".into(),
401                priority: None,
402                labels: vec!["bug".into()],
403                author: Some(User {
404                    id: format!("{i}"),
405                    username: format!("user{i}"),
406                    name: None,
407                    email: None,
408                    avatar_url: None,
409                }),
410                assignees: vec![],
411                url: Some(format!("https://github.com/test/repo/issues/{i}")),
412                created_at: Some("2024-01-01T00:00:00Z".into()),
413                updated_at: Some("2024-01-02T00:00:00Z".into()),
414                attachments_count: None,
415                parent: None,
416                subtasks: vec![],
417                custom_fields: std::collections::HashMap::new(),
418                ..Default::default()
419            })
420            .collect();
421
422        let json = serde_json::to_string_pretty(&issues).unwrap();
423        let toon = encode_issues(&issues, TrimLevel::Full).unwrap();
424
425        // TOON should be more compact than JSON
426        assert!(
427            toon.len() < json.len(),
428            "TOON ({}) should be smaller than JSON ({})",
429            toon.len(),
430            json.len()
431        );
432    }
433
434    #[test]
435    fn test_minimal_much_smaller_than_full() {
436        let issues: Vec<Issue> = (1..=5).map(|i| Issue {
437            key: format!("gh#{i}"),
438            title: format!("Issue {i}"),
439            description: Some("A long description that takes many tokens and should be excluded in minimal mode".into()),
440            state: "open".into(),
441            source: "github".into(),
442            priority: Some("high".into()),
443            labels: vec!["bug".into(), "urgent".into()],
444            author: Some(User {
445                id: format!("{i}"),
446                username: format!("user{i}"),
447                name: Some(format!("User {i}")),
448                email: Some(format!("user{i}@example.com")),
449                avatar_url: Some("https://example.com/avatar.png".into()),
450            }),
451            assignees: vec![],
452            url: Some(format!("https://github.com/test/repo/issues/{i}")),
453            created_at: Some("2024-01-01T00:00:00Z".into()),
454            updated_at: Some("2024-01-02T00:00:00Z".into()),
455            attachments_count: None,
456            parent: None,
457            subtasks: vec![],
458            custom_fields: std::collections::HashMap::new(),
459            ..Default::default()
460        }).collect();
461
462        let full = encode_issues(&issues, TrimLevel::Full).unwrap();
463        let minimal = encode_issues(&issues, TrimLevel::Minimal).unwrap();
464
465        // Minimal should be significantly smaller than Full
466        assert!(
467            minimal.len() * 3 < full.len(),
468            "Minimal ({}) should be at least 3x smaller than Full ({})",
469            minimal.len(),
470            full.len()
471        );
472    }
473}