1#![deny(rustdoc::broken_intra_doc_links)]
26#![deny(rustdoc::private_intra_doc_links)]
27#![deny(rustdoc::invalid_html_tags)]
28pub mod adaptive_config;
29pub mod budget;
30pub mod dedup;
31pub(crate) mod dedup_util;
32pub mod enrichment;
33pub mod layered_pipeline;
34pub mod mckp_router;
35pub mod near_ref;
36pub mod page_index;
37pub mod pagination;
38pub mod projection;
39pub mod round_trip;
40pub mod shape;
41pub mod strategy;
42pub mod telemetry;
43pub mod templates;
44pub mod token_counter;
45pub mod tool_defaults;
46pub mod toon;
47pub mod tree;
48pub mod trim;
49pub mod truncation;
50
51pub use token_counter::{Tokenizer, estimate_tokens, tokens_to_chars};
52pub use truncation::TruncationPlugin;
53
54use devboy_core::{Comment, Discussion, FileDiff, Issue, MergeRequest, Result};
55
56use budget::BudgetConfig;
57use strategy::StrategyResolver;
58
59fn estimate_tokens_from_chars(chars: usize) -> usize {
61 (chars as f64 / 3.5).ceil() as usize
62}
63
64fn encode_mckp<T: serde::Serialize>(items: &[T]) -> Result<String> {
69 let json = serde_json::to_string_pretty(items)?;
70 let cls = shape::classify(&json);
71 let cfg = adaptive_config::MckpConfig::default();
72 if let Some((_id, body)) = mckp_router::route(&cfg, &json, &cls) {
73 Ok(body)
74 } else {
75 Ok(json)
76 }
77}
78
79#[derive(Debug, Clone)]
83pub struct TransformOutput {
84 pub content: String,
86 pub truncated: bool,
88 pub total_count: Option<usize>,
90 pub included_count: usize,
92 pub agent_hint: Option<String>,
94 pub page_cursor: Option<String>,
96 pub page_index: Option<page_index::PageIndex>,
98 pub provider_pagination: Option<devboy_core::Pagination>,
100 pub provider_sort: Option<devboy_core::SortInfo>,
102 pub raw_chars: usize,
104 pub output_chars: usize,
106 pub pre_trim_chars: usize,
109}
110
111impl TransformOutput {
112 pub fn new(content: String) -> Self {
114 let output_chars = content.len();
115 Self {
116 content,
117 truncated: false,
118 total_count: None,
119 included_count: 0,
120 agent_hint: None,
121 page_cursor: None,
122 page_index: None,
123 provider_pagination: None,
124 provider_sort: None,
125 raw_chars: 0,
126 output_chars,
127 pre_trim_chars: 0,
128 }
129 }
130
131 pub fn with_raw_chars(mut self, raw_chars: usize) -> Self {
133 self.raw_chars = raw_chars;
134 self
135 }
136
137 pub fn with_truncation(mut self, total: usize, included: usize, hint: String) -> Self {
139 self.truncated = true;
140 self.total_count = Some(total);
141 self.included_count = included;
142 self.agent_hint = Some(hint);
143 self
144 }
145
146 pub fn to_string_with_hints(&self) -> String {
148 let mut parts = Vec::new();
149
150 if let Some(index) = &self.page_index {
152 parts.push(index.to_toon());
153 }
154
155 parts.push(self.content.clone());
157
158 if let Some(hint) = &self.agent_hint {
160 parts.push(hint.clone());
161 }
162
163 parts.join("\n\n")
164 }
165}
166
167#[derive(Debug, Clone)]
169pub struct PipelineConfig {
170 pub max_chars: usize,
173 pub max_chars_per_item: usize,
175 pub max_description_len: usize,
177 pub format: OutputFormat,
178 pub include_hints: bool,
180 pub page_cursor: Option<String>,
182 pub tool_name: Option<String>,
184 pub chunk: Option<usize>,
187}
188
189impl Default for PipelineConfig {
190 fn default() -> Self {
191 Self {
192 max_chars: 100_000,
193 max_chars_per_item: 10_000,
194 max_description_len: 10_000,
195 format: OutputFormat::Toon,
196 include_hints: true,
197 page_cursor: None,
198 tool_name: None,
199 chunk: None,
200 }
201 }
202}
203
204#[derive(Debug, Clone, Copy, PartialEq, Eq)]
206pub enum OutputFormat {
207 Toon,
212 Json,
214 Mckp,
220}
221
222pub struct Pipeline {
224 config: PipelineConfig,
225}
226
227impl Pipeline {
228 pub fn new() -> Self {
230 Self {
231 config: PipelineConfig::default(),
232 }
233 }
234
235 pub fn with_config(config: PipelineConfig) -> Self {
237 Self { config }
238 }
239
240 pub fn transform_issues(&self, issues: Vec<Issue>) -> Result<TransformOutput> {
242 let total = issues.len();
243 let raw_json = serde_json::to_string(&issues)?;
244 let raw_chars = raw_json.len();
245
246 let full_content = match self.config.format {
248 OutputFormat::Json => serde_json::to_string_pretty(&issues)?,
249 OutputFormat::Toon => toon::encode_issues(&issues, toon::TrimLevel::Full)?,
250 OutputFormat::Mckp => encode_mckp(&issues)?,
251 };
252
253 if self.config.max_chars == 0 || full_content.len() <= self.config.max_chars {
254 let mut output = TransformOutput::new(full_content).with_raw_chars(raw_chars);
255 output.included_count = total;
256 return Ok(output);
257 }
258
259 let budget_config = self.budget_config();
261 let strategy_kind = self.resolve_strategy("get_issues");
262 let result = budget::process_issues(&issues, strategy_kind, &budget_config)?;
263 let chunk_size = result.included_items;
264
265 let (chunk_items, is_chunk_request) = self.slice_for_chunk(&issues, chunk_size);
267 if is_chunk_request {
268 let content = match self.config.format {
269 OutputFormat::Json => serde_json::to_string_pretty(chunk_items)?,
270 OutputFormat::Toon => toon::encode_issues(chunk_items, toon::TrimLevel::Full)?,
271 OutputFormat::Mckp => encode_mckp(chunk_items)?,
272 };
273 let mut output = TransformOutput::new(content).with_raw_chars(raw_chars);
274 output.included_count = chunk_items.len();
275 output.total_count = Some(total);
276 return Ok(output);
277 }
278
279 let json_fallback = self.json_fallback(&full_content);
281 let index = page_index::build_issues_index(&issues, result.included_items);
282 self.build_budget_output(
283 result,
284 raw_chars,
285 total,
286 "issues",
287 Some(index),
288 json_fallback,
289 )
290 }
291
292 pub fn transform_merge_requests(&self, mrs: Vec<MergeRequest>) -> Result<TransformOutput> {
294 let total = mrs.len();
295 let raw_json = serde_json::to_string(&mrs)?;
296 let raw_chars = raw_json.len();
297
298 let full_content = match self.config.format {
299 OutputFormat::Json => serde_json::to_string_pretty(&mrs)?,
300 OutputFormat::Toon => toon::encode_merge_requests(&mrs, toon::TrimLevel::Full)?,
301 OutputFormat::Mckp => encode_mckp(&mrs)?,
302 };
303
304 if self.config.max_chars == 0 || full_content.len() <= self.config.max_chars {
305 let mut output = TransformOutput::new(full_content).with_raw_chars(raw_chars);
306 output.included_count = total;
307 return Ok(output);
308 }
309
310 let budget_config = self.budget_config();
311 let strategy_kind = self.resolve_strategy("get_merge_requests");
312 let result = budget::process_merge_requests(&mrs, strategy_kind, &budget_config)?;
313 let chunk_size = result.included_items;
314
315 let (chunk_items, is_chunk_request) = self.slice_for_chunk(&mrs, chunk_size);
316 if is_chunk_request {
317 let content = match self.config.format {
318 OutputFormat::Json => serde_json::to_string_pretty(chunk_items)?,
319 OutputFormat::Toon => {
320 toon::encode_merge_requests(chunk_items, toon::TrimLevel::Full)?
321 }
322 OutputFormat::Mckp => encode_mckp(chunk_items)?,
323 };
324 let mut output = TransformOutput::new(content).with_raw_chars(raw_chars);
325 output.included_count = chunk_items.len();
326 output.total_count = Some(total);
327 return Ok(output);
328 }
329
330 let json_fallback = self.json_fallback(&full_content);
331 let index = page_index::build_merge_requests_index(&mrs, result.included_items);
332 self.build_budget_output(
333 result,
334 raw_chars,
335 total,
336 "merge_requests",
337 Some(index),
338 json_fallback,
339 )
340 }
341
342 pub fn transform_diffs(&self, diffs: Vec<FileDiff>) -> Result<TransformOutput> {
347 let total = diffs.len();
348
349 let diffs: Vec<FileDiff> = diffs
351 .into_iter()
352 .map(|mut d| {
353 d.diff = truncation::truncate_string(&d.diff, self.config.max_chars_per_item);
354 d
355 })
356 .collect();
357
358 let raw_json = serde_json::to_string(&diffs)?;
359 let raw_chars = raw_json.len();
360
361 let full_content = match self.config.format {
362 OutputFormat::Json => serde_json::to_string_pretty(&diffs)?,
363 OutputFormat::Toon => toon::encode_diffs(&diffs)?,
364 OutputFormat::Mckp => encode_mckp(&diffs)?,
365 };
366
367 if self.config.max_chars == 0 || full_content.len() <= self.config.max_chars {
368 let mut output = TransformOutput::new(full_content).with_raw_chars(raw_chars);
369 output.included_count = total;
370 return Ok(output);
371 }
372
373 let budget_config = self.budget_config();
374 let strategy_kind = self.resolve_strategy("get_merge_request_diffs");
375 let result = budget::process_diffs(&diffs, strategy_kind, &budget_config)?;
376 let chunk_size = result.included_items;
377
378 let (chunk_items, is_chunk_request) = self.slice_for_chunk(&diffs, chunk_size);
379 if is_chunk_request {
380 let content = match self.config.format {
381 OutputFormat::Json => serde_json::to_string_pretty(chunk_items)?,
382 OutputFormat::Toon => toon::encode_diffs(chunk_items)?,
383 OutputFormat::Mckp => encode_mckp(chunk_items)?,
384 };
385 let mut output = TransformOutput::new(content).with_raw_chars(raw_chars);
386 output.included_count = chunk_items.len();
387 output.total_count = Some(total);
388 return Ok(output);
389 }
390
391 let json_fallback = self.json_fallback(&full_content);
392 let index = page_index::build_diffs_index(&diffs, result.included_items);
393 self.build_budget_output(
394 result,
395 raw_chars,
396 total,
397 "diffs",
398 Some(index),
399 json_fallback,
400 )
401 }
402
403 pub fn transform_comments(&self, comments: Vec<Comment>) -> Result<TransformOutput> {
405 let total = comments.len();
406 let raw_json = serde_json::to_string(&comments)?;
407 let raw_chars = raw_json.len();
408
409 let full_content = match self.config.format {
410 OutputFormat::Json => serde_json::to_string_pretty(&comments)?,
411 OutputFormat::Toon => toon::encode_comments(&comments)?,
412 OutputFormat::Mckp => encode_mckp(&comments)?,
413 };
414
415 if self.config.max_chars == 0 || full_content.len() <= self.config.max_chars {
416 let mut output = TransformOutput::new(full_content).with_raw_chars(raw_chars);
417 output.included_count = total;
418 return Ok(output);
419 }
420
421 let budget_config = self.budget_config();
422 let strategy_kind = self.resolve_strategy("get_issue_comments");
423 let result = budget::process_comments(&comments, strategy_kind, &budget_config)?;
424 let chunk_size = result.included_items;
425
426 let (chunk_items, is_chunk_request) = self.slice_for_chunk(&comments, chunk_size);
427 if is_chunk_request {
428 let content = match self.config.format {
429 OutputFormat::Json => serde_json::to_string_pretty(chunk_items)?,
430 OutputFormat::Toon => toon::encode_comments(chunk_items)?,
431 OutputFormat::Mckp => encode_mckp(chunk_items)?,
432 };
433 let mut output = TransformOutput::new(content).with_raw_chars(raw_chars);
434 output.included_count = chunk_items.len();
435 output.total_count = Some(total);
436 return Ok(output);
437 }
438
439 let json_fallback = self.json_fallback(&full_content);
440 let index = page_index::build_comments_index(&comments, result.included_items);
441 self.build_budget_output(
442 result,
443 raw_chars,
444 total,
445 "comments",
446 Some(index),
447 json_fallback,
448 )
449 }
450
451 pub fn transform_discussions(&self, discussions: Vec<Discussion>) -> Result<TransformOutput> {
453 let total = discussions.len();
454 let raw_json = serde_json::to_string(&discussions)?;
455 let raw_chars = raw_json.len();
456
457 let full_content = match self.config.format {
458 OutputFormat::Json => serde_json::to_string_pretty(&discussions)?,
459 OutputFormat::Toon => toon::encode_discussions(&discussions)?,
460 OutputFormat::Mckp => encode_mckp(&discussions)?,
461 };
462
463 if self.config.max_chars == 0 || full_content.len() <= self.config.max_chars {
464 let mut output = TransformOutput::new(full_content).with_raw_chars(raw_chars);
465 output.included_count = total;
466 return Ok(output);
467 }
468
469 let budget_config = self.budget_config();
470 let strategy_kind = self.resolve_strategy("get_merge_request_discussions");
471 let result = budget::process_discussions(&discussions, strategy_kind, &budget_config)?;
472 let chunk_size = result.included_items;
473
474 let (chunk_items, is_chunk_request) = self.slice_for_chunk(&discussions, chunk_size);
475 if is_chunk_request {
476 let content = match self.config.format {
477 OutputFormat::Json => serde_json::to_string_pretty(chunk_items)?,
478 OutputFormat::Toon => toon::encode_discussions(chunk_items)?,
479 OutputFormat::Mckp => encode_mckp(chunk_items)?,
480 };
481 let mut output = TransformOutput::new(content).with_raw_chars(raw_chars);
482 output.included_count = chunk_items.len();
483 output.total_count = Some(total);
484 return Ok(output);
485 }
486
487 let json_fallback = self.json_fallback(&full_content);
488 let index = page_index::build_discussions_index(&discussions, result.included_items);
489 self.build_budget_output(
490 result,
491 raw_chars,
492 total,
493 "discussions",
494 Some(index),
495 json_fallback,
496 )
497 }
498
499 fn json_fallback(&self, content: &str) -> Option<String> {
502 if matches!(self.config.format, OutputFormat::Json) {
503 Some(content.to_string())
504 } else {
505 None
506 }
507 }
508
509 fn slice_for_chunk<'a, T>(&self, items: &'a [T], chunk_size: usize) -> (&'a [T], bool) {
516 match self.config.chunk {
517 Some(n) if n > 1 && chunk_size > 0 => {
518 let offset = (n - 1) * chunk_size;
519 if offset >= items.len() {
520 (&[], true) } else {
522 let end = (offset + chunk_size).min(items.len());
523 (&items[offset..end], true)
524 }
525 }
526 _ => (items, false),
527 }
528 }
529
530 fn budget_config(&self) -> BudgetConfig {
532 BudgetConfig {
533 budget_tokens: estimate_tokens_from_chars(self.config.max_chars),
534 ..Default::default()
535 }
536 }
537
538 fn resolve_strategy(&self, default_tool: &str) -> strategy::TrimStrategyKind {
540 let resolver = StrategyResolver::new();
541 let tool = self.config.tool_name.as_deref().unwrap_or(default_tool);
542 resolver.resolve(tool)
543 }
544
545 fn build_budget_output(
553 &self,
554 result: budget::BudgetResult,
555 raw_chars: usize,
556 total: usize,
557 item_type: &str,
558 index: Option<page_index::PageIndex>,
559 json_fallback: Option<String>,
560 ) -> Result<TransformOutput> {
561 let content = if matches!(self.config.format, OutputFormat::Json) {
563 if let Some(json) = json_fallback {
564 truncation::truncate_string(&json, self.config.max_chars)
565 } else {
566 result.content
567 }
568 } else {
569 result.content
570 };
571
572 let mut output = TransformOutput::new(content).with_raw_chars(raw_chars);
573 output.included_count = result.included_items;
574
575 if result.trimmed {
577 output.truncated = true;
578 output.total_count = Some(total);
579
580 if self.config.include_hints {
581 if let Some(idx) = index {
582 if idx.total_pages > 1 {
583 let hint = format!(
584 "Chunk 1/{}: {} most relevant {} (by priority). {} total items across {} chunks. \
585 Use `chunk: N` parameter to fetch a specific chunk, or request all remaining data.",
586 idx.total_pages,
587 result.included_items,
588 item_type,
589 total,
590 idx.total_pages
591 );
592 output.page_index = Some(idx);
593 output.agent_hint = Some(hint);
594 } else {
595 let remaining = total.saturating_sub(result.included_items);
596 output.agent_hint = Some(format!(
597 "Showing {}/{} {}. {} items trimmed by budget.",
598 result.included_items, total, item_type, remaining
599 ));
600 }
601 } else {
602 let remaining = total.saturating_sub(result.included_items);
603 output.agent_hint = Some(format!(
604 "Showing {}/{} {}. {} items trimmed by budget. Use `chunk: N` parameter to fetch a specific chunk.",
605 result.included_items, total, item_type, remaining
606 ));
607 }
608 }
609 }
610
611 Ok(output)
612 }
613}
614
615impl Default for Pipeline {
616 fn default() -> Self {
617 Self::new()
618 }
619}
620
621#[cfg(test)]
622mod tests {
623 use super::*;
624 use devboy_core::User;
625
626 fn sample_issues() -> Vec<Issue> {
627 (1..=25)
628 .map(|i| Issue {
629 key: format!("gh#{}", i),
630 title: format!("Issue {}", i),
631 description: Some(format!("Description for issue {}", i)),
632 state: "open".to_string(),
633 source: "github".to_string(),
634 priority: None,
635 labels: vec!["bug".to_string()],
636 author: Some(User {
637 id: "1".to_string(),
638 username: "test".to_string(),
639 name: None,
640 email: None,
641 avatar_url: None,
642 }),
643 assignees: vec![],
644 url: Some(format!("https://github.com/test/repo/issues/{}", i)),
645 created_at: Some("2024-01-01T00:00:00Z".to_string()),
646 updated_at: Some("2024-01-02T00:00:00Z".to_string()),
647 attachments_count: None,
648 parent: None,
649 subtasks: vec![],
650 custom_fields: std::collections::HashMap::new(),
651 })
652 .collect()
653 }
654
655 fn sample_merge_requests() -> Vec<MergeRequest> {
656 (1..=5)
657 .map(|i| MergeRequest {
658 key: format!("mr#{}", i),
659 title: format!("MR {}", i),
660 description: Some(format!("MR description {}", i)),
661 state: "opened".to_string(),
662 source: "gitlab".to_string(),
663 source_branch: format!("feature-{}", i),
664 target_branch: "main".to_string(),
665 author: None,
666 assignees: vec![],
667 reviewers: vec![],
668 labels: vec![],
669 url: Some(format!(
670 "https://gitlab.com/test/repo/-/merge_requests/{}",
671 i
672 )),
673 created_at: Some("2024-01-01T00:00:00Z".to_string()),
674 updated_at: Some("2024-01-02T00:00:00Z".to_string()),
675 draft: false,
676 })
677 .collect()
678 }
679
680 fn sample_diffs() -> Vec<FileDiff> {
681 (1..=5)
682 .map(|i| FileDiff {
683 file_path: format!("src/file_{}.rs", i),
684 old_path: None,
685 new_file: i == 1,
686 deleted_file: false,
687 renamed_file: false,
688 diff: format!("+added line {}\n-removed line {}", i, i),
689 additions: Some(1),
690 deletions: Some(1),
691 })
692 .collect()
693 }
694
695 fn sample_comments() -> Vec<Comment> {
696 (1..=5)
697 .map(|i| Comment {
698 id: format!("{}", i),
699 body: format!("Comment body {}", i),
700 author: None,
701 created_at: Some("2024-01-01T00:00:00Z".to_string()),
702 updated_at: None,
703 position: None,
704 })
705 .collect()
706 }
707
708 fn sample_discussions() -> Vec<Discussion> {
709 (1..=5)
710 .map(|i| Discussion {
711 id: format!("{}", i),
712 resolved: i % 2 == 0,
713 resolved_by: None,
714 comments: vec![Comment {
715 id: format!("c{}", i),
716 body: format!("Discussion comment {}", i),
717 author: None,
718 created_at: None,
719 updated_at: None,
720 position: None,
721 }],
722 position: None,
723 })
724 .collect()
725 }
726
727 #[test]
730 fn test_pipeline_truncates_items() {
731 let pipeline = Pipeline::with_config(PipelineConfig {
733 max_chars: 200,
734 ..Default::default()
735 });
736
737 let issues = sample_issues();
738 let output = pipeline.transform_issues(issues).unwrap();
739
740 assert!(output.truncated);
741 assert_eq!(output.total_count, Some(25));
742 assert!(output.included_count < 25);
743 assert!(output.agent_hint.is_some());
744 }
745
746 #[test]
747 fn test_pipeline_no_truncation_when_under_limit() {
748 let pipeline = Pipeline::with_config(PipelineConfig {
749 max_chars: 100_000,
750 ..Default::default()
751 });
752
753 let issues: Vec<Issue> = sample_issues().into_iter().take(5).collect();
754 let output = pipeline.transform_issues(issues).unwrap();
755
756 assert!(!output.truncated);
757 assert!(output.agent_hint.is_none());
758 }
759
760 #[test]
763 fn test_toon_format_issues() {
764 let pipeline = Pipeline::with_config(PipelineConfig {
765 format: OutputFormat::Toon,
766 max_chars: 100_000,
767 ..Default::default()
768 });
769
770 let issues: Vec<Issue> = sample_issues().into_iter().take(3).collect();
771 let output = pipeline.transform_issues(issues).unwrap();
772
773 assert!(output.content.contains("gh#1"));
774 assert!(output.content.contains("Issue 1"));
775 }
776
777 #[test]
778 fn test_toon_format_merge_requests() {
779 let pipeline = Pipeline::with_config(PipelineConfig {
781 format: OutputFormat::Toon,
782 max_chars: 500,
783 ..Default::default()
784 });
785
786 let mrs = sample_merge_requests();
787 let output = pipeline.transform_merge_requests(mrs).unwrap();
788
789 assert!(output.content.contains("mr#1"));
790 assert!(output.content.contains("MR 1"));
791 assert!(output.truncated);
792 assert!(output.included_count < 5);
793 }
794
795 #[test]
796 fn test_toon_format_diffs() {
797 let pipeline = Pipeline::with_config(PipelineConfig {
799 format: OutputFormat::Toon,
800 max_chars: 200,
801 ..Default::default()
802 });
803
804 let diffs = sample_diffs();
805 let output = pipeline.transform_diffs(diffs).unwrap();
806
807 assert!(output.content.contains("src/file_1.rs"));
808 assert!(output.truncated);
809 assert!(output.included_count < 5);
810 }
811
812 #[test]
813 fn test_toon_format_comments() {
814 let pipeline = Pipeline::with_config(PipelineConfig {
817 format: OutputFormat::Toon,
818 max_chars: 300,
819 ..Default::default()
820 });
821
822 let comments = sample_comments();
823 let output = pipeline.transform_comments(comments).unwrap();
824
825 assert!(output.content.contains("Comment body"));
827 assert!(output.truncated);
828 assert!(output.included_count < 5);
829 }
830
831 #[test]
832 fn test_toon_format_discussions() {
833 let pipeline = Pipeline::with_config(PipelineConfig {
835 format: OutputFormat::Toon,
836 max_chars: 500,
837 ..Default::default()
838 });
839
840 let discussions = sample_discussions();
841 let output = pipeline.transform_discussions(discussions).unwrap();
842
843 assert!(output.content.contains("Discussion comment 1"));
844 assert!(output.truncated);
845 assert!(output.included_count < 5);
846 }
847
848 #[test]
851 fn test_json_format_issues() {
852 let pipeline = Pipeline::with_config(PipelineConfig {
853 format: OutputFormat::Json,
854 max_chars: 100_000,
855 ..Default::default()
856 });
857
858 let issues: Vec<Issue> = sample_issues().into_iter().take(2).collect();
859 let output = pipeline.transform_issues(issues).unwrap();
860
861 let parsed: Vec<Issue> = serde_json::from_str(&output.content).unwrap();
862 assert_eq!(parsed.len(), 2);
863 }
864
865 #[test]
866 fn test_json_format_merge_requests() {
867 let pipeline = Pipeline::with_config(PipelineConfig {
868 format: OutputFormat::Json,
869 max_chars: 100_000,
870 ..Default::default()
871 });
872
873 let mrs: Vec<MergeRequest> = sample_merge_requests().into_iter().take(2).collect();
874 let output = pipeline.transform_merge_requests(mrs).unwrap();
875
876 let parsed: Vec<MergeRequest> = serde_json::from_str(&output.content).unwrap();
877 assert_eq!(parsed.len(), 2);
878 }
879
880 #[test]
881 fn test_json_format_diffs() {
882 let pipeline = Pipeline::with_config(PipelineConfig {
883 format: OutputFormat::Json,
884 max_chars: 100_000,
885 ..Default::default()
886 });
887
888 let diffs: Vec<FileDiff> = sample_diffs().into_iter().take(2).collect();
889 let output = pipeline.transform_diffs(diffs).unwrap();
890
891 let parsed: Vec<FileDiff> = serde_json::from_str(&output.content).unwrap();
892 assert_eq!(parsed.len(), 2);
893 }
894
895 #[test]
896 fn test_json_format_comments() {
897 let pipeline = Pipeline::with_config(PipelineConfig {
898 format: OutputFormat::Json,
899 max_chars: 100_000,
900 ..Default::default()
901 });
902
903 let comments: Vec<Comment> = sample_comments().into_iter().take(2).collect();
904 let output = pipeline.transform_comments(comments).unwrap();
905
906 let parsed: Vec<Comment> = serde_json::from_str(&output.content).unwrap();
907 assert_eq!(parsed.len(), 2);
908 }
909
910 #[test]
911 fn test_json_format_discussions() {
912 let pipeline = Pipeline::with_config(PipelineConfig {
913 format: OutputFormat::Json,
914 max_chars: 100_000,
915 ..Default::default()
916 });
917
918 let discussions: Vec<Discussion> = sample_discussions().into_iter().take(2).collect();
919 let output = pipeline.transform_discussions(discussions).unwrap();
920
921 let parsed: Vec<Discussion> = serde_json::from_str(&output.content).unwrap();
922 assert_eq!(parsed.len(), 2);
923 }
924
925 #[test]
928 fn test_transform_output_to_string_with_hints() {
929 let output = TransformOutput::new("content".to_string());
930 assert_eq!(output.to_string_with_hints(), "content");
931
932 let output = TransformOutput::new("content".to_string()).with_truncation(
933 10,
934 5,
935 "hint text".to_string(),
936 );
937 assert!(output.to_string_with_hints().contains("content"));
938 assert!(output.to_string_with_hints().contains("hint text"));
939 }
940
941 #[test]
942 fn test_transform_output_with_truncation() {
943 let output =
944 TransformOutput::new("data".into()).with_truncation(100, 10, "90 more items".into());
945 assert!(output.truncated);
946 assert_eq!(output.total_count, Some(100));
947 assert_eq!(output.included_count, 10);
948 assert_eq!(output.agent_hint.as_deref(), Some("90 more items"));
949 }
950
951 #[test]
954 fn test_pipeline_config_default_values() {
955 let config = PipelineConfig::default();
956 assert_eq!(config.max_chars, 100_000);
957 assert_eq!(config.max_chars_per_item, 10_000);
958 assert_eq!(config.max_description_len, 10_000);
959 assert!(matches!(config.format, OutputFormat::Toon));
960 assert!(config.include_hints);
961 }
962
963 #[test]
964 fn test_pipeline_default() {
965 let pipeline = Pipeline::default();
966 let issues: Vec<Issue> = sample_issues().into_iter().take(1).collect();
967 let output = pipeline.transform_issues(issues).unwrap();
968 assert!(!output.content.is_empty());
969 }
970
971 #[test]
972 fn test_pipeline_hints_disabled() {
973 let pipeline = Pipeline::with_config(PipelineConfig {
975 max_chars: 200,
976 include_hints: false,
977 ..Default::default()
978 });
979
980 let issues = sample_issues();
981 let output = pipeline.transform_issues(issues).unwrap();
982
983 assert!(output.included_count < 25);
984 assert!(output.truncated);
986 assert!(output.agent_hint.is_none());
988 assert!(output.page_index.is_none());
989 }
990
991 #[test]
994 fn test_char_limit_applied() {
995 let pipeline = Pipeline::with_config(PipelineConfig {
996 max_chars: 100,
997 ..Default::default()
998 });
999
1000 let issues = sample_issues();
1001 let output = pipeline.transform_issues(issues).unwrap();
1002
1003 assert!(output.truncated);
1004 }
1005
1006 #[test]
1007 fn test_char_limit_triggers_trimming() {
1008 let pipeline = Pipeline::with_config(PipelineConfig {
1009 max_chars: 50,
1010 ..Default::default()
1011 });
1012
1013 let issues: Vec<Issue> = sample_issues().into_iter().take(3).collect();
1014 let output = pipeline.transform_issues(issues).unwrap();
1015 assert!(output.truncated);
1016 }
1017
1018 #[test]
1021 fn test_transform_empty_issues() {
1022 let pipeline = Pipeline::new();
1023 let output = pipeline.transform_issues(vec![]).unwrap();
1024 assert!(!output.truncated);
1025 assert_eq!(output.included_count, 0);
1026 }
1027
1028 #[test]
1029 fn test_transform_empty_merge_requests() {
1030 let pipeline = Pipeline::new();
1031 let output = pipeline.transform_merge_requests(vec![]).unwrap();
1032 assert!(!output.truncated);
1033 assert_eq!(output.included_count, 0);
1034 }
1035
1036 #[test]
1037 fn test_transform_empty_diffs() {
1038 let pipeline = Pipeline::new();
1039 let output = pipeline.transform_diffs(vec![]).unwrap();
1040 assert!(!output.truncated);
1041 assert_eq!(output.included_count, 0);
1042 }
1043
1044 #[test]
1045 fn test_transform_empty_comments() {
1046 let pipeline = Pipeline::new();
1047 let output = pipeline.transform_comments(vec![]).unwrap();
1048 assert!(!output.truncated);
1049 assert_eq!(output.included_count, 0);
1050 }
1051
1052 #[test]
1053 fn test_transform_empty_discussions() {
1054 let pipeline = Pipeline::new();
1055 let output = pipeline.transform_discussions(vec![]).unwrap();
1056 assert!(!output.truncated);
1057 assert_eq!(output.included_count, 0);
1058 }
1059
1060 #[test]
1063 fn test_diff_content_truncated_per_item() {
1064 let pipeline = Pipeline::with_config(PipelineConfig {
1065 max_chars_per_item: 10,
1066 max_chars: 100_000,
1067 ..Default::default()
1068 });
1069
1070 let diffs = vec![FileDiff {
1071 file_path: "big.rs".into(),
1072 old_path: None,
1073 new_file: false,
1074 deleted_file: false,
1075 renamed_file: false,
1076 diff: "x".repeat(1000),
1077 additions: Some(100),
1078 deletions: Some(0),
1079 }];
1080
1081 let output = pipeline.transform_diffs(diffs).unwrap();
1082 assert!(output.content.len() < 1000);
1083 }
1084
1085 #[test]
1090 fn test_json_format_with_budget_trimming_issues() {
1091 let pipeline = Pipeline::with_config(PipelineConfig {
1092 format: OutputFormat::Json,
1093 max_chars: 200,
1094 ..Default::default()
1095 });
1096
1097 let issues = sample_issues();
1098 let output = pipeline.transform_issues(issues).unwrap();
1099
1100 assert!(output.truncated);
1101 assert!(output.included_count < 25);
1102 assert!(!output.content.is_empty());
1104 }
1105
1106 #[test]
1107 fn test_json_format_with_budget_trimming_merge_requests() {
1108 let pipeline = Pipeline::with_config(PipelineConfig {
1109 format: OutputFormat::Json,
1110 max_chars: 200,
1111 ..Default::default()
1112 });
1113
1114 let mrs = sample_merge_requests();
1115 let output = pipeline.transform_merge_requests(mrs).unwrap();
1116
1117 assert!(output.truncated);
1118 assert!(!output.content.is_empty());
1119 }
1120
1121 #[test]
1122 fn test_json_format_with_budget_trimming_diffs() {
1123 let pipeline = Pipeline::with_config(PipelineConfig {
1124 format: OutputFormat::Json,
1125 max_chars: 100,
1126 ..Default::default()
1127 });
1128
1129 let diffs = sample_diffs();
1130 let output = pipeline.transform_diffs(diffs).unwrap();
1131
1132 assert!(output.truncated);
1133 assert!(!output.content.is_empty());
1134 }
1135
1136 #[test]
1137 fn test_json_format_with_budget_trimming_comments() {
1138 let pipeline = Pipeline::with_config(PipelineConfig {
1139 format: OutputFormat::Json,
1140 max_chars: 100,
1141 ..Default::default()
1142 });
1143
1144 let comments = sample_comments();
1145 let output = pipeline.transform_comments(comments).unwrap();
1146
1147 assert!(output.truncated);
1148 assert!(!output.content.is_empty());
1149 }
1150
1151 #[test]
1152 fn test_json_format_with_budget_trimming_discussions() {
1153 let pipeline = Pipeline::with_config(PipelineConfig {
1154 format: OutputFormat::Json,
1155 max_chars: 100,
1156 ..Default::default()
1157 });
1158
1159 let discussions = sample_discussions();
1160 let output = pipeline.transform_discussions(discussions).unwrap();
1161
1162 assert!(output.truncated);
1163 assert!(!output.content.is_empty());
1164 }
1165
1166 #[test]
1169 fn test_pipeline_chunk_index_with_many_issues() {
1170 let issues: Vec<Issue> = (1..=50)
1172 .map(|i| Issue {
1173 key: format!("gh#{}", i),
1174 title: format!("Issue {} with a moderately long title for sizing", i),
1175 description: Some(format!(
1176 "Description for issue {} with substantial content to inflate token count significantly beyond budget",
1177 i
1178 )),
1179 state: "open".to_string(),
1180 source: "github".to_string(),
1181 priority: None,
1182 labels: vec!["bug".to_string(), "critical".to_string()],
1183 author: Some(User {
1184 id: "1".to_string(),
1185 username: "test".to_string(),
1186 name: None,
1187 email: None,
1188 avatar_url: None,
1189 }),
1190 assignees: vec![],
1191 url: Some(format!("https://github.com/test/repo/issues/{}", i)),
1192 created_at: Some("2024-01-01T00:00:00Z".to_string()),
1193 updated_at: Some("2024-01-02T00:00:00Z".to_string()),
1194 attachments_count: None,
1195 parent: None,
1196 subtasks: vec![],
1197 custom_fields: std::collections::HashMap::new(),
1198 })
1199 .collect();
1200
1201 let pipeline = Pipeline::with_config(PipelineConfig {
1202 max_chars: 500,
1203 include_hints: true,
1204 ..Default::default()
1205 });
1206
1207 let output = pipeline.transform_issues(issues).unwrap();
1208
1209 assert!(output.truncated);
1210 assert!(output.included_count < 50);
1211 if let Some(ref hint) = output.agent_hint {
1213 assert!(
1214 hint.contains("Chunk") || hint.contains("Showing"),
1215 "Expected chunk or showing hint, got: {}",
1216 hint
1217 );
1218 }
1219 }
1220
1221 #[test]
1222 fn test_toon_smaller_than_json_for_issues() {
1223 let issues: Vec<Issue> = sample_issues().into_iter().take(10).collect();
1224
1225 let json_pipeline = Pipeline::with_config(PipelineConfig {
1226 format: OutputFormat::Json,
1227 max_chars: 1_000_000,
1228 ..Default::default()
1229 });
1230 let toon_pipeline = Pipeline::with_config(PipelineConfig {
1231 format: OutputFormat::Toon,
1232 max_chars: 1_000_000,
1233 ..Default::default()
1234 });
1235
1236 let json_output = json_pipeline.transform_issues(issues.clone()).unwrap();
1237 let toon_output = toon_pipeline.transform_issues(issues).unwrap();
1238
1239 assert!(
1240 toon_output.content.len() < json_output.content.len(),
1241 "TOON ({}) should be smaller than JSON ({})",
1242 toon_output.content.len(),
1243 json_output.content.len()
1244 );
1245 }
1246
1247 #[test]
1248 fn test_mckp_routes_issues_through_inner_table() {
1249 let issues: Vec<Issue> = sample_issues().into_iter().take(10).collect();
1250
1251 let mckp_pipeline = Pipeline::with_config(PipelineConfig {
1252 format: OutputFormat::Mckp,
1253 max_chars: 1_000_000,
1254 ..Default::default()
1255 });
1256 let json_pipeline = Pipeline::with_config(PipelineConfig {
1257 format: OutputFormat::Json,
1258 max_chars: 1_000_000,
1259 ..Default::default()
1260 });
1261
1262 let mckp_out = mckp_pipeline.transform_issues(issues.clone()).unwrap();
1263 let json_out = json_pipeline.transform_issues(issues).unwrap();
1264
1265 assert!(
1268 mckp_out.content.len() < json_out.content.len(),
1269 "MCKP ({}) should be smaller than JSON ({})",
1270 mckp_out.content.len(),
1271 json_out.content.len(),
1272 );
1273 for k in ["key", "title", "state", "source"] {
1276 assert!(
1277 mckp_out.content.contains(k),
1278 "MCKP output is missing field `{k}`: {}",
1279 &mckp_out.content[..mckp_out.content.len().min(200)]
1280 );
1281 }
1282 }
1283
1284 #[test]
1285 fn test_mckp_falls_back_to_pretty_json_on_unstable_keys() {
1286 let issues: Vec<Issue> = sample_issues().into_iter().take(1).collect();
1290 let mckp_pipeline = Pipeline::with_config(PipelineConfig {
1291 format: OutputFormat::Mckp,
1292 max_chars: 1_000_000,
1293 ..Default::default()
1294 });
1295 let out = mckp_pipeline.transform_issues(issues).unwrap();
1296 assert!(out.content.contains("gh#1"));
1297 }
1298}