1#![deny(rustdoc::broken_intra_doc_links)]
26#![deny(rustdoc::private_intra_doc_links)]
27#![deny(rustdoc::invalid_html_tags)]
28pub mod adaptive_config;
29pub mod budget;
30pub mod dedup;
31pub(crate) mod dedup_util;
32pub mod enrichment;
33pub mod layered_pipeline;
34pub mod mckp_router;
35pub mod near_ref;
36pub mod page_index;
37pub mod pagination;
38pub mod projection;
39pub mod round_trip;
40pub mod shape;
41pub mod strategy;
42pub mod telemetry;
43pub mod templates;
44pub mod token_counter;
45pub mod tool_defaults;
46pub mod toon;
47pub mod tree;
48pub mod trim;
49pub mod truncation;
50
51pub use token_counter::{Tokenizer, estimate_tokens, tokens_to_chars};
52pub use truncation::TruncationPlugin;
53
54use devboy_core::{Comment, Discussion, FileDiff, Issue, MergeRequest, Result};
55
56use budget::BudgetConfig;
57use strategy::StrategyResolver;
58
59fn estimate_tokens_from_chars(chars: usize) -> usize {
61 (chars as f64 / 3.5).ceil() as usize
62}
63
64fn encode_mckp<T: serde::Serialize>(items: &[T]) -> Result<String> {
69 let json = serde_json::to_string_pretty(items)?;
70 let cls = shape::classify(&json);
71 let cfg = adaptive_config::MckpConfig::default();
72 if let Some((_id, body)) = mckp_router::route(&cfg, &json, &cls) {
73 Ok(body)
74 } else {
75 Ok(json)
76 }
77}
78
79#[derive(Debug, Clone)]
83pub struct TransformOutput {
84 pub content: String,
86 pub truncated: bool,
88 pub total_count: Option<usize>,
90 pub included_count: usize,
92 pub agent_hint: Option<String>,
94 pub page_cursor: Option<String>,
96 pub page_index: Option<page_index::PageIndex>,
98 pub provider_pagination: Option<devboy_core::Pagination>,
100 pub provider_sort: Option<devboy_core::SortInfo>,
102 pub raw_chars: usize,
104 pub output_chars: usize,
106 pub pre_trim_chars: usize,
109}
110
111impl TransformOutput {
112 pub fn new(content: String) -> Self {
114 let output_chars = content.len();
115 Self {
116 content,
117 truncated: false,
118 total_count: None,
119 included_count: 0,
120 agent_hint: None,
121 page_cursor: None,
122 page_index: None,
123 provider_pagination: None,
124 provider_sort: None,
125 raw_chars: 0,
126 output_chars,
127 pre_trim_chars: 0,
128 }
129 }
130
131 pub fn with_raw_chars(mut self, raw_chars: usize) -> Self {
133 self.raw_chars = raw_chars;
134 self
135 }
136
137 pub fn with_truncation(mut self, total: usize, included: usize, hint: String) -> Self {
139 self.truncated = true;
140 self.total_count = Some(total);
141 self.included_count = included;
142 self.agent_hint = Some(hint);
143 self
144 }
145
146 pub fn to_string_with_hints(&self) -> String {
148 let mut parts = Vec::new();
149
150 if let Some(index) = &self.page_index {
152 parts.push(index.to_toon());
153 }
154
155 parts.push(self.content.clone());
157
158 if let Some(hint) = &self.agent_hint {
160 parts.push(hint.clone());
161 }
162
163 parts.join("\n\n")
164 }
165}
166
167#[derive(Debug, Clone)]
169pub struct PipelineConfig {
170 pub max_chars: usize,
173 pub max_chars_per_item: usize,
175 pub max_description_len: usize,
177 pub format: OutputFormat,
178 pub include_hints: bool,
180 pub page_cursor: Option<String>,
182 pub tool_name: Option<String>,
184 pub chunk: Option<usize>,
187}
188
189impl Default for PipelineConfig {
190 fn default() -> Self {
191 Self {
192 max_chars: 100_000,
193 max_chars_per_item: 10_000,
194 max_description_len: 10_000,
195 format: OutputFormat::Toon,
196 include_hints: true,
197 page_cursor: None,
198 tool_name: None,
199 chunk: None,
200 }
201 }
202}
203
204#[derive(Debug, Clone, Copy, PartialEq, Eq)]
206pub enum OutputFormat {
207 Toon,
212 Json,
214 Mckp,
220}
221
222pub struct Pipeline {
224 config: PipelineConfig,
225}
226
227impl Pipeline {
228 pub fn new() -> Self {
230 Self {
231 config: PipelineConfig::default(),
232 }
233 }
234
235 pub fn with_config(config: PipelineConfig) -> Self {
237 Self { config }
238 }
239
240 pub fn transform_issues(&self, issues: Vec<Issue>) -> Result<TransformOutput> {
242 let total = issues.len();
243 let raw_json = serde_json::to_string(&issues)?;
244 let raw_chars = raw_json.len();
245
246 let full_content = match self.config.format {
248 OutputFormat::Json => serde_json::to_string_pretty(&issues)?,
249 OutputFormat::Toon => toon::encode_issues(&issues, toon::TrimLevel::Full)?,
250 OutputFormat::Mckp => encode_mckp(&issues)?,
251 };
252
253 if self.config.max_chars == 0 || full_content.len() <= self.config.max_chars {
254 let mut output = TransformOutput::new(full_content).with_raw_chars(raw_chars);
255 output.included_count = total;
256 return Ok(output);
257 }
258
259 let budget_config = self.budget_config();
261 let strategy_kind = self.resolve_strategy("get_issues");
262 let result = budget::process_issues(&issues, strategy_kind, &budget_config)?;
263 let chunk_size = result.included_items;
264
265 let (chunk_items, is_chunk_request) = self.slice_for_chunk(&issues, chunk_size);
267 if is_chunk_request {
268 let content = match self.config.format {
269 OutputFormat::Json => serde_json::to_string_pretty(chunk_items)?,
270 OutputFormat::Toon => toon::encode_issues(chunk_items, toon::TrimLevel::Full)?,
271 OutputFormat::Mckp => encode_mckp(chunk_items)?,
272 };
273 let mut output = TransformOutput::new(content).with_raw_chars(raw_chars);
274 output.included_count = chunk_items.len();
275 output.total_count = Some(total);
276 return Ok(output);
277 }
278
279 let json_fallback = self.json_fallback(&full_content);
281 let index = page_index::build_issues_index(&issues, result.included_items);
282 self.build_budget_output(
283 result,
284 raw_chars,
285 total,
286 "issues",
287 Some(index),
288 json_fallback,
289 )
290 }
291
292 pub fn transform_merge_requests(&self, mrs: Vec<MergeRequest>) -> Result<TransformOutput> {
294 let total = mrs.len();
295 let raw_json = serde_json::to_string(&mrs)?;
296 let raw_chars = raw_json.len();
297
298 let full_content = match self.config.format {
299 OutputFormat::Json => serde_json::to_string_pretty(&mrs)?,
300 OutputFormat::Toon => toon::encode_merge_requests(&mrs, toon::TrimLevel::Full)?,
301 OutputFormat::Mckp => encode_mckp(&mrs)?,
302 };
303
304 if self.config.max_chars == 0 || full_content.len() <= self.config.max_chars {
305 let mut output = TransformOutput::new(full_content).with_raw_chars(raw_chars);
306 output.included_count = total;
307 return Ok(output);
308 }
309
310 let budget_config = self.budget_config();
311 let strategy_kind = self.resolve_strategy("get_merge_requests");
312 let result = budget::process_merge_requests(&mrs, strategy_kind, &budget_config)?;
313 let chunk_size = result.included_items;
314
315 let (chunk_items, is_chunk_request) = self.slice_for_chunk(&mrs, chunk_size);
316 if is_chunk_request {
317 let content = match self.config.format {
318 OutputFormat::Json => serde_json::to_string_pretty(chunk_items)?,
319 OutputFormat::Toon => {
320 toon::encode_merge_requests(chunk_items, toon::TrimLevel::Full)?
321 }
322 OutputFormat::Mckp => encode_mckp(chunk_items)?,
323 };
324 let mut output = TransformOutput::new(content).with_raw_chars(raw_chars);
325 output.included_count = chunk_items.len();
326 output.total_count = Some(total);
327 return Ok(output);
328 }
329
330 let json_fallback = self.json_fallback(&full_content);
331 let index = page_index::build_merge_requests_index(&mrs, result.included_items);
332 self.build_budget_output(
333 result,
334 raw_chars,
335 total,
336 "merge_requests",
337 Some(index),
338 json_fallback,
339 )
340 }
341
342 pub fn transform_diffs(&self, diffs: Vec<FileDiff>) -> Result<TransformOutput> {
347 let total = diffs.len();
348
349 let diffs: Vec<FileDiff> = diffs
351 .into_iter()
352 .map(|mut d| {
353 d.diff = truncation::truncate_string(&d.diff, self.config.max_chars_per_item);
354 d
355 })
356 .collect();
357
358 let raw_json = serde_json::to_string(&diffs)?;
359 let raw_chars = raw_json.len();
360
361 let full_content = match self.config.format {
362 OutputFormat::Json => serde_json::to_string_pretty(&diffs)?,
363 OutputFormat::Toon => toon::encode_diffs(&diffs)?,
364 OutputFormat::Mckp => encode_mckp(&diffs)?,
365 };
366
367 if self.config.max_chars == 0 || full_content.len() <= self.config.max_chars {
368 let mut output = TransformOutput::new(full_content).with_raw_chars(raw_chars);
369 output.included_count = total;
370 return Ok(output);
371 }
372
373 let budget_config = self.budget_config();
374 let strategy_kind = self.resolve_strategy("get_merge_request_diffs");
375 let result = budget::process_diffs(&diffs, strategy_kind, &budget_config)?;
376 let chunk_size = result.included_items;
377
378 let (chunk_items, is_chunk_request) = self.slice_for_chunk(&diffs, chunk_size);
379 if is_chunk_request {
380 let content = match self.config.format {
381 OutputFormat::Json => serde_json::to_string_pretty(chunk_items)?,
382 OutputFormat::Toon => toon::encode_diffs(chunk_items)?,
383 OutputFormat::Mckp => encode_mckp(chunk_items)?,
384 };
385 let mut output = TransformOutput::new(content).with_raw_chars(raw_chars);
386 output.included_count = chunk_items.len();
387 output.total_count = Some(total);
388 return Ok(output);
389 }
390
391 let json_fallback = self.json_fallback(&full_content);
392 let index = page_index::build_diffs_index(&diffs, result.included_items);
393 self.build_budget_output(
394 result,
395 raw_chars,
396 total,
397 "diffs",
398 Some(index),
399 json_fallback,
400 )
401 }
402
403 pub fn transform_comments(&self, comments: Vec<Comment>) -> Result<TransformOutput> {
405 let total = comments.len();
406 let raw_json = serde_json::to_string(&comments)?;
407 let raw_chars = raw_json.len();
408
409 let full_content = match self.config.format {
410 OutputFormat::Json => serde_json::to_string_pretty(&comments)?,
411 OutputFormat::Toon => toon::encode_comments(&comments)?,
412 OutputFormat::Mckp => encode_mckp(&comments)?,
413 };
414
415 if self.config.max_chars == 0 || full_content.len() <= self.config.max_chars {
416 let mut output = TransformOutput::new(full_content).with_raw_chars(raw_chars);
417 output.included_count = total;
418 return Ok(output);
419 }
420
421 let budget_config = self.budget_config();
422 let strategy_kind = self.resolve_strategy("get_issue_comments");
423 let result = budget::process_comments(&comments, strategy_kind, &budget_config)?;
424 let chunk_size = result.included_items;
425
426 let (chunk_items, is_chunk_request) = self.slice_for_chunk(&comments, chunk_size);
427 if is_chunk_request {
428 let content = match self.config.format {
429 OutputFormat::Json => serde_json::to_string_pretty(chunk_items)?,
430 OutputFormat::Toon => toon::encode_comments(chunk_items)?,
431 OutputFormat::Mckp => encode_mckp(chunk_items)?,
432 };
433 let mut output = TransformOutput::new(content).with_raw_chars(raw_chars);
434 output.included_count = chunk_items.len();
435 output.total_count = Some(total);
436 return Ok(output);
437 }
438
439 let json_fallback = self.json_fallback(&full_content);
440 let index = page_index::build_comments_index(&comments, result.included_items);
441 self.build_budget_output(
442 result,
443 raw_chars,
444 total,
445 "comments",
446 Some(index),
447 json_fallback,
448 )
449 }
450
451 pub fn transform_discussions(&self, discussions: Vec<Discussion>) -> Result<TransformOutput> {
453 let total = discussions.len();
454 let raw_json = serde_json::to_string(&discussions)?;
455 let raw_chars = raw_json.len();
456
457 let full_content = match self.config.format {
458 OutputFormat::Json => serde_json::to_string_pretty(&discussions)?,
459 OutputFormat::Toon => toon::encode_discussions(&discussions)?,
460 OutputFormat::Mckp => encode_mckp(&discussions)?,
461 };
462
463 if self.config.max_chars == 0 || full_content.len() <= self.config.max_chars {
464 let mut output = TransformOutput::new(full_content).with_raw_chars(raw_chars);
465 output.included_count = total;
466 return Ok(output);
467 }
468
469 let budget_config = self.budget_config();
470 let strategy_kind = self.resolve_strategy("get_merge_request_discussions");
471 let result = budget::process_discussions(&discussions, strategy_kind, &budget_config)?;
472 let chunk_size = result.included_items;
473
474 let (chunk_items, is_chunk_request) = self.slice_for_chunk(&discussions, chunk_size);
475 if is_chunk_request {
476 let content = match self.config.format {
477 OutputFormat::Json => serde_json::to_string_pretty(chunk_items)?,
478 OutputFormat::Toon => toon::encode_discussions(chunk_items)?,
479 OutputFormat::Mckp => encode_mckp(chunk_items)?,
480 };
481 let mut output = TransformOutput::new(content).with_raw_chars(raw_chars);
482 output.included_count = chunk_items.len();
483 output.total_count = Some(total);
484 return Ok(output);
485 }
486
487 let json_fallback = self.json_fallback(&full_content);
488 let index = page_index::build_discussions_index(&discussions, result.included_items);
489 self.build_budget_output(
490 result,
491 raw_chars,
492 total,
493 "discussions",
494 Some(index),
495 json_fallback,
496 )
497 }
498
499 fn json_fallback(&self, content: &str) -> Option<String> {
502 if matches!(self.config.format, OutputFormat::Json) {
503 Some(content.to_string())
504 } else {
505 None
506 }
507 }
508
509 fn slice_for_chunk<'a, T>(&self, items: &'a [T], chunk_size: usize) -> (&'a [T], bool) {
516 match self.config.chunk {
517 Some(n) if n > 1 && chunk_size > 0 => {
518 let offset = (n - 1) * chunk_size;
519 if offset >= items.len() {
520 (&[], true) } else {
522 let end = (offset + chunk_size).min(items.len());
523 (&items[offset..end], true)
524 }
525 }
526 _ => (items, false),
527 }
528 }
529
530 fn budget_config(&self) -> BudgetConfig {
532 BudgetConfig {
533 budget_tokens: estimate_tokens_from_chars(self.config.max_chars),
534 ..Default::default()
535 }
536 }
537
538 fn resolve_strategy(&self, default_tool: &str) -> strategy::TrimStrategyKind {
540 let resolver = StrategyResolver::new();
541 let tool = self.config.tool_name.as_deref().unwrap_or(default_tool);
542 resolver.resolve(tool)
543 }
544
545 fn build_budget_output(
553 &self,
554 result: budget::BudgetResult,
555 raw_chars: usize,
556 total: usize,
557 item_type: &str,
558 index: Option<page_index::PageIndex>,
559 json_fallback: Option<String>,
560 ) -> Result<TransformOutput> {
561 let content = if matches!(self.config.format, OutputFormat::Json) {
563 if let Some(json) = json_fallback {
564 truncation::truncate_string(&json, self.config.max_chars)
565 } else {
566 result.content
567 }
568 } else {
569 result.content
570 };
571
572 let mut output = TransformOutput::new(content).with_raw_chars(raw_chars);
573 output.included_count = result.included_items;
574
575 if result.trimmed {
577 output.truncated = true;
578 output.total_count = Some(total);
579
580 if self.config.include_hints {
581 if let Some(idx) = index {
582 if idx.total_pages > 1 {
583 let hint = format!(
584 "Chunk 1/{}: {} most relevant {} (by priority). {} total items across {} chunks. \
585 Use `chunk: N` parameter to fetch a specific chunk, or request all remaining data.",
586 idx.total_pages,
587 result.included_items,
588 item_type,
589 total,
590 idx.total_pages
591 );
592 output.page_index = Some(idx);
593 output.agent_hint = Some(hint);
594 } else {
595 let remaining = total.saturating_sub(result.included_items);
596 output.agent_hint = Some(format!(
597 "Showing {}/{} {}. {} items trimmed by budget.",
598 result.included_items, total, item_type, remaining
599 ));
600 }
601 } else {
602 let remaining = total.saturating_sub(result.included_items);
603 output.agent_hint = Some(format!(
604 "Showing {}/{} {}. {} items trimmed by budget. Use `chunk: N` parameter to fetch a specific chunk.",
605 result.included_items, total, item_type, remaining
606 ));
607 }
608 }
609 }
610
611 Ok(output)
612 }
613}
614
615impl Default for Pipeline {
616 fn default() -> Self {
617 Self::new()
618 }
619}
620
621#[cfg(test)]
622mod tests {
623 use super::*;
624 use devboy_core::User;
625
626 fn sample_issues() -> Vec<Issue> {
627 (1..=25)
628 .map(|i| Issue {
629 key: format!("gh#{}", i),
630 title: format!("Issue {}", i),
631 description: Some(format!("Description for issue {}", i)),
632 state: "open".to_string(),
633 source: "github".to_string(),
634 priority: None,
635 labels: vec!["bug".to_string()],
636 author: Some(User {
637 id: "1".to_string(),
638 username: "test".to_string(),
639 name: None,
640 email: None,
641 avatar_url: None,
642 }),
643 assignees: vec![],
644 url: Some(format!("https://github.com/test/repo/issues/{}", i)),
645 created_at: Some("2024-01-01T00:00:00Z".to_string()),
646 updated_at: Some("2024-01-02T00:00:00Z".to_string()),
647 attachments_count: None,
648 parent: None,
649 subtasks: vec![],
650 custom_fields: std::collections::HashMap::new(),
651 ..Default::default()
652 })
653 .collect()
654 }
655
656 fn sample_merge_requests() -> Vec<MergeRequest> {
657 (1..=5)
658 .map(|i| MergeRequest {
659 key: format!("mr#{}", i),
660 title: format!("MR {}", i),
661 description: Some(format!("MR description {}", i)),
662 state: "opened".to_string(),
663 source: "gitlab".to_string(),
664 source_branch: format!("feature-{}", i),
665 target_branch: "main".to_string(),
666 author: None,
667 assignees: vec![],
668 reviewers: vec![],
669 labels: vec![],
670 url: Some(format!(
671 "https://gitlab.com/test/repo/-/merge_requests/{}",
672 i
673 )),
674 created_at: Some("2024-01-01T00:00:00Z".to_string()),
675 updated_at: Some("2024-01-02T00:00:00Z".to_string()),
676 draft: false,
677 })
678 .collect()
679 }
680
681 fn sample_diffs() -> Vec<FileDiff> {
682 (1..=5)
683 .map(|i| FileDiff {
684 file_path: format!("src/file_{}.rs", i),
685 old_path: None,
686 new_file: i == 1,
687 deleted_file: false,
688 renamed_file: false,
689 diff: format!("+added line {}\n-removed line {}", i, i),
690 additions: Some(1),
691 deletions: Some(1),
692 })
693 .collect()
694 }
695
696 fn sample_comments() -> Vec<Comment> {
697 (1..=5)
698 .map(|i| Comment {
699 id: format!("{}", i),
700 body: format!("Comment body {}", i),
701 author: None,
702 created_at: Some("2024-01-01T00:00:00Z".to_string()),
703 updated_at: None,
704 position: None,
705 })
706 .collect()
707 }
708
709 fn sample_discussions() -> Vec<Discussion> {
710 (1..=5)
711 .map(|i| Discussion {
712 id: format!("{}", i),
713 resolved: i % 2 == 0,
714 resolved_by: None,
715 comments: vec![Comment {
716 id: format!("c{}", i),
717 body: format!("Discussion comment {}", i),
718 author: None,
719 created_at: None,
720 updated_at: None,
721 position: None,
722 }],
723 position: None,
724 })
725 .collect()
726 }
727
728 #[test]
731 fn test_pipeline_truncates_items() {
732 let pipeline = Pipeline::with_config(PipelineConfig {
734 max_chars: 200,
735 ..Default::default()
736 });
737
738 let issues = sample_issues();
739 let output = pipeline.transform_issues(issues).unwrap();
740
741 assert!(output.truncated);
742 assert_eq!(output.total_count, Some(25));
743 assert!(output.included_count < 25);
744 assert!(output.agent_hint.is_some());
745 }
746
747 #[test]
748 fn test_pipeline_no_truncation_when_under_limit() {
749 let pipeline = Pipeline::with_config(PipelineConfig {
750 max_chars: 100_000,
751 ..Default::default()
752 });
753
754 let issues: Vec<Issue> = sample_issues().into_iter().take(5).collect();
755 let output = pipeline.transform_issues(issues).unwrap();
756
757 assert!(!output.truncated);
758 assert!(output.agent_hint.is_none());
759 }
760
761 #[test]
764 fn test_toon_format_issues() {
765 let pipeline = Pipeline::with_config(PipelineConfig {
766 format: OutputFormat::Toon,
767 max_chars: 100_000,
768 ..Default::default()
769 });
770
771 let issues: Vec<Issue> = sample_issues().into_iter().take(3).collect();
772 let output = pipeline.transform_issues(issues).unwrap();
773
774 assert!(output.content.contains("gh#1"));
775 assert!(output.content.contains("Issue 1"));
776 }
777
778 #[test]
779 fn test_toon_format_merge_requests() {
780 let pipeline = Pipeline::with_config(PipelineConfig {
782 format: OutputFormat::Toon,
783 max_chars: 500,
784 ..Default::default()
785 });
786
787 let mrs = sample_merge_requests();
788 let output = pipeline.transform_merge_requests(mrs).unwrap();
789
790 assert!(output.content.contains("mr#1"));
791 assert!(output.content.contains("MR 1"));
792 assert!(output.truncated);
793 assert!(output.included_count < 5);
794 }
795
796 #[test]
797 fn test_toon_format_diffs() {
798 let pipeline = Pipeline::with_config(PipelineConfig {
800 format: OutputFormat::Toon,
801 max_chars: 200,
802 ..Default::default()
803 });
804
805 let diffs = sample_diffs();
806 let output = pipeline.transform_diffs(diffs).unwrap();
807
808 assert!(output.content.contains("src/file_1.rs"));
809 assert!(output.truncated);
810 assert!(output.included_count < 5);
811 }
812
813 #[test]
814 fn test_toon_format_comments() {
815 let pipeline = Pipeline::with_config(PipelineConfig {
818 format: OutputFormat::Toon,
819 max_chars: 300,
820 ..Default::default()
821 });
822
823 let comments = sample_comments();
824 let output = pipeline.transform_comments(comments).unwrap();
825
826 assert!(output.content.contains("Comment body"));
828 assert!(output.truncated);
829 assert!(output.included_count < 5);
830 }
831
832 #[test]
833 fn test_toon_format_discussions() {
834 let pipeline = Pipeline::with_config(PipelineConfig {
836 format: OutputFormat::Toon,
837 max_chars: 500,
838 ..Default::default()
839 });
840
841 let discussions = sample_discussions();
842 let output = pipeline.transform_discussions(discussions).unwrap();
843
844 assert!(output.content.contains("Discussion comment 1"));
845 assert!(output.truncated);
846 assert!(output.included_count < 5);
847 }
848
849 #[test]
852 fn test_json_format_issues() {
853 let pipeline = Pipeline::with_config(PipelineConfig {
854 format: OutputFormat::Json,
855 max_chars: 100_000,
856 ..Default::default()
857 });
858
859 let issues: Vec<Issue> = sample_issues().into_iter().take(2).collect();
860 let output = pipeline.transform_issues(issues).unwrap();
861
862 let parsed: Vec<Issue> = serde_json::from_str(&output.content).unwrap();
863 assert_eq!(parsed.len(), 2);
864 }
865
866 #[test]
867 fn test_json_format_merge_requests() {
868 let pipeline = Pipeline::with_config(PipelineConfig {
869 format: OutputFormat::Json,
870 max_chars: 100_000,
871 ..Default::default()
872 });
873
874 let mrs: Vec<MergeRequest> = sample_merge_requests().into_iter().take(2).collect();
875 let output = pipeline.transform_merge_requests(mrs).unwrap();
876
877 let parsed: Vec<MergeRequest> = serde_json::from_str(&output.content).unwrap();
878 assert_eq!(parsed.len(), 2);
879 }
880
881 #[test]
882 fn test_json_format_diffs() {
883 let pipeline = Pipeline::with_config(PipelineConfig {
884 format: OutputFormat::Json,
885 max_chars: 100_000,
886 ..Default::default()
887 });
888
889 let diffs: Vec<FileDiff> = sample_diffs().into_iter().take(2).collect();
890 let output = pipeline.transform_diffs(diffs).unwrap();
891
892 let parsed: Vec<FileDiff> = serde_json::from_str(&output.content).unwrap();
893 assert_eq!(parsed.len(), 2);
894 }
895
896 #[test]
897 fn test_json_format_comments() {
898 let pipeline = Pipeline::with_config(PipelineConfig {
899 format: OutputFormat::Json,
900 max_chars: 100_000,
901 ..Default::default()
902 });
903
904 let comments: Vec<Comment> = sample_comments().into_iter().take(2).collect();
905 let output = pipeline.transform_comments(comments).unwrap();
906
907 let parsed: Vec<Comment> = serde_json::from_str(&output.content).unwrap();
908 assert_eq!(parsed.len(), 2);
909 }
910
911 #[test]
912 fn test_json_format_discussions() {
913 let pipeline = Pipeline::with_config(PipelineConfig {
914 format: OutputFormat::Json,
915 max_chars: 100_000,
916 ..Default::default()
917 });
918
919 let discussions: Vec<Discussion> = sample_discussions().into_iter().take(2).collect();
920 let output = pipeline.transform_discussions(discussions).unwrap();
921
922 let parsed: Vec<Discussion> = serde_json::from_str(&output.content).unwrap();
923 assert_eq!(parsed.len(), 2);
924 }
925
926 #[test]
929 fn test_transform_output_to_string_with_hints() {
930 let output = TransformOutput::new("content".to_string());
931 assert_eq!(output.to_string_with_hints(), "content");
932
933 let output = TransformOutput::new("content".to_string()).with_truncation(
934 10,
935 5,
936 "hint text".to_string(),
937 );
938 assert!(output.to_string_with_hints().contains("content"));
939 assert!(output.to_string_with_hints().contains("hint text"));
940 }
941
942 #[test]
943 fn test_transform_output_with_truncation() {
944 let output =
945 TransformOutput::new("data".into()).with_truncation(100, 10, "90 more items".into());
946 assert!(output.truncated);
947 assert_eq!(output.total_count, Some(100));
948 assert_eq!(output.included_count, 10);
949 assert_eq!(output.agent_hint.as_deref(), Some("90 more items"));
950 }
951
952 #[test]
955 fn test_pipeline_config_default_values() {
956 let config = PipelineConfig::default();
957 assert_eq!(config.max_chars, 100_000);
958 assert_eq!(config.max_chars_per_item, 10_000);
959 assert_eq!(config.max_description_len, 10_000);
960 assert!(matches!(config.format, OutputFormat::Toon));
961 assert!(config.include_hints);
962 }
963
964 #[test]
965 fn test_pipeline_default() {
966 let pipeline = Pipeline::default();
967 let issues: Vec<Issue> = sample_issues().into_iter().take(1).collect();
968 let output = pipeline.transform_issues(issues).unwrap();
969 assert!(!output.content.is_empty());
970 }
971
972 #[test]
973 fn test_pipeline_hints_disabled() {
974 let pipeline = Pipeline::with_config(PipelineConfig {
976 max_chars: 200,
977 include_hints: false,
978 ..Default::default()
979 });
980
981 let issues = sample_issues();
982 let output = pipeline.transform_issues(issues).unwrap();
983
984 assert!(output.included_count < 25);
985 assert!(output.truncated);
987 assert!(output.agent_hint.is_none());
989 assert!(output.page_index.is_none());
990 }
991
992 #[test]
995 fn test_char_limit_applied() {
996 let pipeline = Pipeline::with_config(PipelineConfig {
997 max_chars: 100,
998 ..Default::default()
999 });
1000
1001 let issues = sample_issues();
1002 let output = pipeline.transform_issues(issues).unwrap();
1003
1004 assert!(output.truncated);
1005 }
1006
1007 #[test]
1008 fn test_char_limit_triggers_trimming() {
1009 let pipeline = Pipeline::with_config(PipelineConfig {
1010 max_chars: 50,
1011 ..Default::default()
1012 });
1013
1014 let issues: Vec<Issue> = sample_issues().into_iter().take(3).collect();
1015 let output = pipeline.transform_issues(issues).unwrap();
1016 assert!(output.truncated);
1017 }
1018
1019 #[test]
1022 fn test_transform_empty_issues() {
1023 let pipeline = Pipeline::new();
1024 let output = pipeline.transform_issues(vec![]).unwrap();
1025 assert!(!output.truncated);
1026 assert_eq!(output.included_count, 0);
1027 }
1028
1029 #[test]
1030 fn test_transform_empty_merge_requests() {
1031 let pipeline = Pipeline::new();
1032 let output = pipeline.transform_merge_requests(vec![]).unwrap();
1033 assert!(!output.truncated);
1034 assert_eq!(output.included_count, 0);
1035 }
1036
1037 #[test]
1038 fn test_transform_empty_diffs() {
1039 let pipeline = Pipeline::new();
1040 let output = pipeline.transform_diffs(vec![]).unwrap();
1041 assert!(!output.truncated);
1042 assert_eq!(output.included_count, 0);
1043 }
1044
1045 #[test]
1046 fn test_transform_empty_comments() {
1047 let pipeline = Pipeline::new();
1048 let output = pipeline.transform_comments(vec![]).unwrap();
1049 assert!(!output.truncated);
1050 assert_eq!(output.included_count, 0);
1051 }
1052
1053 #[test]
1054 fn test_transform_empty_discussions() {
1055 let pipeline = Pipeline::new();
1056 let output = pipeline.transform_discussions(vec![]).unwrap();
1057 assert!(!output.truncated);
1058 assert_eq!(output.included_count, 0);
1059 }
1060
1061 #[test]
1064 fn test_diff_content_truncated_per_item() {
1065 let pipeline = Pipeline::with_config(PipelineConfig {
1066 max_chars_per_item: 10,
1067 max_chars: 100_000,
1068 ..Default::default()
1069 });
1070
1071 let diffs = vec![FileDiff {
1072 file_path: "big.rs".into(),
1073 old_path: None,
1074 new_file: false,
1075 deleted_file: false,
1076 renamed_file: false,
1077 diff: "x".repeat(1000),
1078 additions: Some(100),
1079 deletions: Some(0),
1080 }];
1081
1082 let output = pipeline.transform_diffs(diffs).unwrap();
1083 assert!(output.content.len() < 1000);
1084 }
1085
1086 #[test]
1091 fn test_json_format_with_budget_trimming_issues() {
1092 let pipeline = Pipeline::with_config(PipelineConfig {
1093 format: OutputFormat::Json,
1094 max_chars: 200,
1095 ..Default::default()
1096 });
1097
1098 let issues = sample_issues();
1099 let output = pipeline.transform_issues(issues).unwrap();
1100
1101 assert!(output.truncated);
1102 assert!(output.included_count < 25);
1103 assert!(!output.content.is_empty());
1105 }
1106
1107 #[test]
1108 fn test_json_format_with_budget_trimming_merge_requests() {
1109 let pipeline = Pipeline::with_config(PipelineConfig {
1110 format: OutputFormat::Json,
1111 max_chars: 200,
1112 ..Default::default()
1113 });
1114
1115 let mrs = sample_merge_requests();
1116 let output = pipeline.transform_merge_requests(mrs).unwrap();
1117
1118 assert!(output.truncated);
1119 assert!(!output.content.is_empty());
1120 }
1121
1122 #[test]
1123 fn test_json_format_with_budget_trimming_diffs() {
1124 let pipeline = Pipeline::with_config(PipelineConfig {
1125 format: OutputFormat::Json,
1126 max_chars: 100,
1127 ..Default::default()
1128 });
1129
1130 let diffs = sample_diffs();
1131 let output = pipeline.transform_diffs(diffs).unwrap();
1132
1133 assert!(output.truncated);
1134 assert!(!output.content.is_empty());
1135 }
1136
1137 #[test]
1138 fn test_json_format_with_budget_trimming_comments() {
1139 let pipeline = Pipeline::with_config(PipelineConfig {
1140 format: OutputFormat::Json,
1141 max_chars: 100,
1142 ..Default::default()
1143 });
1144
1145 let comments = sample_comments();
1146 let output = pipeline.transform_comments(comments).unwrap();
1147
1148 assert!(output.truncated);
1149 assert!(!output.content.is_empty());
1150 }
1151
1152 #[test]
1153 fn test_json_format_with_budget_trimming_discussions() {
1154 let pipeline = Pipeline::with_config(PipelineConfig {
1155 format: OutputFormat::Json,
1156 max_chars: 100,
1157 ..Default::default()
1158 });
1159
1160 let discussions = sample_discussions();
1161 let output = pipeline.transform_discussions(discussions).unwrap();
1162
1163 assert!(output.truncated);
1164 assert!(!output.content.is_empty());
1165 }
1166
1167 #[test]
1170 fn test_pipeline_chunk_index_with_many_issues() {
1171 let issues: Vec<Issue> = (1..=50)
1173 .map(|i| Issue {
1174 key: format!("gh#{}", i),
1175 title: format!("Issue {} with a moderately long title for sizing", i),
1176 description: Some(format!(
1177 "Description for issue {} with substantial content to inflate token count significantly beyond budget",
1178 i
1179 )),
1180 state: "open".to_string(),
1181 source: "github".to_string(),
1182 priority: None,
1183 labels: vec!["bug".to_string(), "critical".to_string()],
1184 author: Some(User {
1185 id: "1".to_string(),
1186 username: "test".to_string(),
1187 name: None,
1188 email: None,
1189 avatar_url: None,
1190 }),
1191 assignees: vec![],
1192 url: Some(format!("https://github.com/test/repo/issues/{}", i)),
1193 created_at: Some("2024-01-01T00:00:00Z".to_string()),
1194 updated_at: Some("2024-01-02T00:00:00Z".to_string()),
1195 attachments_count: None,
1196 parent: None,
1197 subtasks: vec![],
1198 custom_fields: std::collections::HashMap::new(),
1199 ..Default::default()
1200 })
1201 .collect();
1202
1203 let pipeline = Pipeline::with_config(PipelineConfig {
1204 max_chars: 500,
1205 include_hints: true,
1206 ..Default::default()
1207 });
1208
1209 let output = pipeline.transform_issues(issues).unwrap();
1210
1211 assert!(output.truncated);
1212 assert!(output.included_count < 50);
1213 if let Some(ref hint) = output.agent_hint {
1215 assert!(
1216 hint.contains("Chunk") || hint.contains("Showing"),
1217 "Expected chunk or showing hint, got: {}",
1218 hint
1219 );
1220 }
1221 }
1222
1223 #[test]
1224 fn test_toon_smaller_than_json_for_issues() {
1225 let issues: Vec<Issue> = sample_issues().into_iter().take(10).collect();
1226
1227 let json_pipeline = Pipeline::with_config(PipelineConfig {
1228 format: OutputFormat::Json,
1229 max_chars: 1_000_000,
1230 ..Default::default()
1231 });
1232 let toon_pipeline = Pipeline::with_config(PipelineConfig {
1233 format: OutputFormat::Toon,
1234 max_chars: 1_000_000,
1235 ..Default::default()
1236 });
1237
1238 let json_output = json_pipeline.transform_issues(issues.clone()).unwrap();
1239 let toon_output = toon_pipeline.transform_issues(issues).unwrap();
1240
1241 assert!(
1242 toon_output.content.len() < json_output.content.len(),
1243 "TOON ({}) should be smaller than JSON ({})",
1244 toon_output.content.len(),
1245 json_output.content.len()
1246 );
1247 }
1248
1249 #[test]
1250 fn test_mckp_routes_issues_through_inner_table() {
1251 let issues: Vec<Issue> = sample_issues().into_iter().take(10).collect();
1252
1253 let mckp_pipeline = Pipeline::with_config(PipelineConfig {
1254 format: OutputFormat::Mckp,
1255 max_chars: 1_000_000,
1256 ..Default::default()
1257 });
1258 let json_pipeline = Pipeline::with_config(PipelineConfig {
1259 format: OutputFormat::Json,
1260 max_chars: 1_000_000,
1261 ..Default::default()
1262 });
1263
1264 let mckp_out = mckp_pipeline.transform_issues(issues.clone()).unwrap();
1265 let json_out = json_pipeline.transform_issues(issues).unwrap();
1266
1267 assert!(
1270 mckp_out.content.len() < json_out.content.len(),
1271 "MCKP ({}) should be smaller than JSON ({})",
1272 mckp_out.content.len(),
1273 json_out.content.len(),
1274 );
1275 for k in ["key", "title", "state", "source"] {
1278 assert!(
1279 mckp_out.content.contains(k),
1280 "MCKP output is missing field `{k}`: {}",
1281 &mckp_out.content[..mckp_out.content.len().min(200)]
1282 );
1283 }
1284 }
1285
1286 #[test]
1287 fn test_mckp_falls_back_to_pretty_json_on_unstable_keys() {
1288 let issues: Vec<Issue> = sample_issues().into_iter().take(1).collect();
1292 let mckp_pipeline = Pipeline::with_config(PipelineConfig {
1293 format: OutputFormat::Mckp,
1294 max_chars: 1_000_000,
1295 ..Default::default()
1296 });
1297 let out = mckp_pipeline.transform_issues(issues).unwrap();
1298 assert!(out.content.contains("gh#1"));
1299 }
1300}