1#![deny(rustdoc::broken_intra_doc_links)]
26#![deny(rustdoc::private_intra_doc_links)]
27#![deny(rustdoc::invalid_html_tags)]
28pub mod adaptive_config;
29pub mod budget;
30pub mod dedup;
31pub(crate) mod dedup_util;
32pub mod enrichment;
33pub mod layered_pipeline;
34pub mod mckp_router;
35pub mod near_ref;
36pub mod page_index;
37pub mod pagination;
38pub mod projection;
39pub mod round_trip;
40pub mod shape;
41pub mod strategy;
42pub mod telemetry;
43pub mod templates;
44pub mod token_counter;
45pub mod tool_defaults;
46pub mod toon;
47pub mod tree;
48pub mod trim;
49pub mod truncation;
50
51pub use token_counter::{Tokenizer, estimate_tokens, tokens_to_chars};
52pub use truncation::TruncationPlugin;
53
54use devboy_core::{Comment, Discussion, FileDiff, Issue, MergeRequest, Result};
55
56use budget::BudgetConfig;
57use strategy::StrategyResolver;
58
59fn estimate_tokens_from_chars(chars: usize) -> usize {
61 (chars as f64 / 3.5).ceil() as usize
62}
63
64fn encode_mckp<T: serde::Serialize>(items: &[T]) -> Result<String> {
69 let json = serde_json::to_string_pretty(items)?;
70 let cls = shape::classify(&json);
71 let cfg = adaptive_config::MckpConfig::default();
72 if let Some((_id, body)) = mckp_router::route(&cfg, &json, &cls) {
73 Ok(body)
74 } else {
75 Ok(json)
76 }
77}
78
79#[derive(Debug, Clone)]
83pub struct TransformOutput {
84 pub content: String,
86 pub truncated: bool,
88 pub total_count: Option<usize>,
90 pub included_count: usize,
92 pub agent_hint: Option<String>,
94 pub page_cursor: Option<String>,
96 pub page_index: Option<page_index::PageIndex>,
98 pub provider_pagination: Option<devboy_core::Pagination>,
100 pub provider_sort: Option<devboy_core::SortInfo>,
102 pub raw_chars: usize,
104 pub output_chars: usize,
106 pub pre_trim_chars: usize,
109}
110
111impl TransformOutput {
112 pub fn new(content: String) -> Self {
114 let output_chars = content.len();
115 Self {
116 content,
117 truncated: false,
118 total_count: None,
119 included_count: 0,
120 agent_hint: None,
121 page_cursor: None,
122 page_index: None,
123 provider_pagination: None,
124 provider_sort: None,
125 raw_chars: 0,
126 output_chars,
127 pre_trim_chars: 0,
128 }
129 }
130
131 pub fn with_raw_chars(mut self, raw_chars: usize) -> Self {
133 self.raw_chars = raw_chars;
134 self
135 }
136
137 pub fn with_truncation(mut self, total: usize, included: usize, hint: String) -> Self {
139 self.truncated = true;
140 self.total_count = Some(total);
141 self.included_count = included;
142 self.agent_hint = Some(hint);
143 self
144 }
145
146 pub fn to_string_with_hints(&self) -> String {
148 let mut parts = Vec::new();
149
150 if let Some(index) = &self.page_index {
152 parts.push(index.to_toon());
153 }
154
155 parts.push(self.content.clone());
157
158 if let Some(hint) = &self.agent_hint {
160 parts.push(hint.clone());
161 }
162
163 parts.join("\n\n")
164 }
165}
166
167#[derive(Debug, Clone)]
169pub struct PipelineConfig {
170 pub max_chars: usize,
173 pub max_chars_per_item: usize,
175 pub max_description_len: usize,
177 pub format: OutputFormat,
178 pub include_hints: bool,
180 pub page_cursor: Option<String>,
182 pub tool_name: Option<String>,
184 pub chunk: Option<usize>,
187}
188
189impl Default for PipelineConfig {
190 fn default() -> Self {
191 Self {
192 max_chars: 100_000,
193 max_chars_per_item: 10_000,
194 max_description_len: 10_000,
195 format: OutputFormat::Toon,
196 include_hints: true,
197 page_cursor: None,
198 tool_name: None,
199 chunk: None,
200 }
201 }
202}
203
204#[derive(Debug, Clone, Copy, PartialEq, Eq)]
206pub enum OutputFormat {
207 Toon,
212 Json,
214 Mckp,
220}
221
222pub struct Pipeline {
224 config: PipelineConfig,
225}
226
227impl Pipeline {
228 pub fn new() -> Self {
230 Self {
231 config: PipelineConfig::default(),
232 }
233 }
234
235 pub fn with_config(config: PipelineConfig) -> Self {
237 Self { config }
238 }
239
240 pub fn transform_issues(&self, issues: Vec<Issue>) -> Result<TransformOutput> {
242 let total = issues.len();
243 let raw_json = serde_json::to_string(&issues)?;
244 let raw_chars = raw_json.len();
245
246 let full_content = match self.config.format {
248 OutputFormat::Json => serde_json::to_string_pretty(&issues)?,
249 OutputFormat::Toon => toon::encode_issues(&issues, toon::TrimLevel::Full)?,
250 OutputFormat::Mckp => encode_mckp(&issues)?,
251 };
252
253 if self.config.max_chars == 0 || full_content.len() <= self.config.max_chars {
254 let mut output = TransformOutput::new(full_content).with_raw_chars(raw_chars);
255 output.included_count = total;
256 return Ok(output);
257 }
258
259 let budget_config = self.budget_config();
261 let strategy_kind = self.resolve_strategy("get_issues");
262 let result = budget::process_issues(&issues, strategy_kind, &budget_config)?;
263 let chunk_size = result.included_items;
264
265 let (chunk_items, is_chunk_request) = self.slice_for_chunk(&issues, chunk_size);
267 if is_chunk_request {
268 let content = match self.config.format {
269 OutputFormat::Json => serde_json::to_string_pretty(chunk_items)?,
270 OutputFormat::Toon => toon::encode_issues(chunk_items, toon::TrimLevel::Full)?,
271 OutputFormat::Mckp => encode_mckp(chunk_items)?,
272 };
273 let mut output = TransformOutput::new(content).with_raw_chars(raw_chars);
274 output.included_count = chunk_items.len();
275 output.total_count = Some(total);
276 return Ok(output);
277 }
278
279 let json_fallback = self.json_fallback(&full_content);
281 let index = page_index::build_issues_index(&issues, result.included_items);
282 self.build_budget_output(
283 result,
284 raw_chars,
285 total,
286 "issues",
287 Some(index),
288 json_fallback,
289 )
290 }
291
292 pub fn transform_merge_requests(&self, mrs: Vec<MergeRequest>) -> Result<TransformOutput> {
294 let total = mrs.len();
295 let raw_json = serde_json::to_string(&mrs)?;
296 let raw_chars = raw_json.len();
297
298 let full_content = match self.config.format {
299 OutputFormat::Json => serde_json::to_string_pretty(&mrs)?,
300 OutputFormat::Toon => toon::encode_merge_requests(&mrs, toon::TrimLevel::Full)?,
301 OutputFormat::Mckp => encode_mckp(&mrs)?,
302 };
303
304 if self.config.max_chars == 0 || full_content.len() <= self.config.max_chars {
305 let mut output = TransformOutput::new(full_content).with_raw_chars(raw_chars);
306 output.included_count = total;
307 return Ok(output);
308 }
309
310 let budget_config = self.budget_config();
311 let strategy_kind = self.resolve_strategy("get_merge_requests");
312 let result = budget::process_merge_requests(&mrs, strategy_kind, &budget_config)?;
313 let chunk_size = result.included_items;
314
315 let (chunk_items, is_chunk_request) = self.slice_for_chunk(&mrs, chunk_size);
316 if is_chunk_request {
317 let content = match self.config.format {
318 OutputFormat::Json => serde_json::to_string_pretty(chunk_items)?,
319 OutputFormat::Toon => {
320 toon::encode_merge_requests(chunk_items, toon::TrimLevel::Full)?
321 }
322 OutputFormat::Mckp => encode_mckp(chunk_items)?,
323 };
324 let mut output = TransformOutput::new(content).with_raw_chars(raw_chars);
325 output.included_count = chunk_items.len();
326 output.total_count = Some(total);
327 return Ok(output);
328 }
329
330 let json_fallback = self.json_fallback(&full_content);
331 let index = page_index::build_merge_requests_index(&mrs, result.included_items);
332 self.build_budget_output(
333 result,
334 raw_chars,
335 total,
336 "merge_requests",
337 Some(index),
338 json_fallback,
339 )
340 }
341
342 pub fn transform_diffs(&self, diffs: Vec<FileDiff>) -> Result<TransformOutput> {
347 let total = diffs.len();
348
349 let diffs: Vec<FileDiff> = diffs
351 .into_iter()
352 .map(|mut d| {
353 d.diff = truncation::truncate_string(&d.diff, self.config.max_chars_per_item);
354 d
355 })
356 .collect();
357
358 let raw_json = serde_json::to_string(&diffs)?;
359 let raw_chars = raw_json.len();
360
361 let full_content = match self.config.format {
362 OutputFormat::Json => serde_json::to_string_pretty(&diffs)?,
363 OutputFormat::Toon => toon::encode_diffs(&diffs)?,
364 OutputFormat::Mckp => encode_mckp(&diffs)?,
365 };
366
367 if self.config.max_chars == 0 || full_content.len() <= self.config.max_chars {
368 let mut output = TransformOutput::new(full_content).with_raw_chars(raw_chars);
369 output.included_count = total;
370 return Ok(output);
371 }
372
373 let budget_config = self.budget_config();
374 let strategy_kind = self.resolve_strategy("get_merge_request_diffs");
375 let result = budget::process_diffs(&diffs, strategy_kind, &budget_config)?;
376 let chunk_size = result.included_items;
377
378 let (chunk_items, is_chunk_request) = self.slice_for_chunk(&diffs, chunk_size);
379 if is_chunk_request {
380 let content = match self.config.format {
381 OutputFormat::Json => serde_json::to_string_pretty(chunk_items)?,
382 OutputFormat::Toon => toon::encode_diffs(chunk_items)?,
383 OutputFormat::Mckp => encode_mckp(chunk_items)?,
384 };
385 let mut output = TransformOutput::new(content).with_raw_chars(raw_chars);
386 output.included_count = chunk_items.len();
387 output.total_count = Some(total);
388 return Ok(output);
389 }
390
391 let json_fallback = self.json_fallback(&full_content);
392 let index = page_index::build_diffs_index(&diffs, result.included_items);
393 self.build_budget_output(
394 result,
395 raw_chars,
396 total,
397 "diffs",
398 Some(index),
399 json_fallback,
400 )
401 }
402
403 pub fn transform_comments(&self, comments: Vec<Comment>) -> Result<TransformOutput> {
405 let total = comments.len();
406 let raw_json = serde_json::to_string(&comments)?;
407 let raw_chars = raw_json.len();
408
409 let full_content = match self.config.format {
410 OutputFormat::Json => serde_json::to_string_pretty(&comments)?,
411 OutputFormat::Toon => toon::encode_comments(&comments)?,
412 OutputFormat::Mckp => encode_mckp(&comments)?,
413 };
414
415 if self.config.max_chars == 0 || full_content.len() <= self.config.max_chars {
416 let mut output = TransformOutput::new(full_content).with_raw_chars(raw_chars);
417 output.included_count = total;
418 return Ok(output);
419 }
420
421 let budget_config = self.budget_config();
422 let strategy_kind = self.resolve_strategy("get_issue_comments");
423 let result = budget::process_comments(&comments, strategy_kind, &budget_config)?;
424 let chunk_size = result.included_items;
425
426 let (chunk_items, is_chunk_request) = self.slice_for_chunk(&comments, chunk_size);
427 if is_chunk_request {
428 let content = match self.config.format {
429 OutputFormat::Json => serde_json::to_string_pretty(chunk_items)?,
430 OutputFormat::Toon => toon::encode_comments(chunk_items)?,
431 OutputFormat::Mckp => encode_mckp(chunk_items)?,
432 };
433 let mut output = TransformOutput::new(content).with_raw_chars(raw_chars);
434 output.included_count = chunk_items.len();
435 output.total_count = Some(total);
436 return Ok(output);
437 }
438
439 let json_fallback = self.json_fallback(&full_content);
440 let index = page_index::build_comments_index(&comments, result.included_items);
441 self.build_budget_output(
442 result,
443 raw_chars,
444 total,
445 "comments",
446 Some(index),
447 json_fallback,
448 )
449 }
450
451 pub fn transform_discussions(&self, discussions: Vec<Discussion>) -> Result<TransformOutput> {
453 let total = discussions.len();
454 let raw_json = serde_json::to_string(&discussions)?;
455 let raw_chars = raw_json.len();
456
457 let full_content = match self.config.format {
458 OutputFormat::Json => serde_json::to_string_pretty(&discussions)?,
459 OutputFormat::Toon => toon::encode_discussions(&discussions)?,
460 OutputFormat::Mckp => encode_mckp(&discussions)?,
461 };
462
463 if self.config.max_chars == 0 || full_content.len() <= self.config.max_chars {
464 let mut output = TransformOutput::new(full_content).with_raw_chars(raw_chars);
465 output.included_count = total;
466 return Ok(output);
467 }
468
469 let budget_config = self.budget_config();
470 let strategy_kind = self.resolve_strategy("get_merge_request_discussions");
471 let result = budget::process_discussions(&discussions, strategy_kind, &budget_config)?;
472 let chunk_size = result.included_items;
473
474 let (chunk_items, is_chunk_request) = self.slice_for_chunk(&discussions, chunk_size);
475 if is_chunk_request {
476 let content = match self.config.format {
477 OutputFormat::Json => serde_json::to_string_pretty(chunk_items)?,
478 OutputFormat::Toon => toon::encode_discussions(chunk_items)?,
479 OutputFormat::Mckp => encode_mckp(chunk_items)?,
480 };
481 let mut output = TransformOutput::new(content).with_raw_chars(raw_chars);
482 output.included_count = chunk_items.len();
483 output.total_count = Some(total);
484 return Ok(output);
485 }
486
487 let json_fallback = self.json_fallback(&full_content);
488 let index = page_index::build_discussions_index(&discussions, result.included_items);
489 self.build_budget_output(
490 result,
491 raw_chars,
492 total,
493 "discussions",
494 Some(index),
495 json_fallback,
496 )
497 }
498
499 fn json_fallback(&self, content: &str) -> Option<String> {
502 if matches!(self.config.format, OutputFormat::Json) {
503 Some(content.to_string())
504 } else {
505 None
506 }
507 }
508
509 fn slice_for_chunk<'a, T>(&self, items: &'a [T], chunk_size: usize) -> (&'a [T], bool) {
516 match self.config.chunk {
517 Some(n) if n > 1 && chunk_size > 0 => {
518 let offset = (n - 1) * chunk_size;
519 if offset >= items.len() {
520 (&[], true) } else {
522 let end = (offset + chunk_size).min(items.len());
523 (&items[offset..end], true)
524 }
525 }
526 _ => (items, false),
527 }
528 }
529
530 fn budget_config(&self) -> BudgetConfig {
532 BudgetConfig {
533 budget_tokens: estimate_tokens_from_chars(self.config.max_chars),
534 ..Default::default()
535 }
536 }
537
538 fn resolve_strategy(&self, default_tool: &str) -> strategy::TrimStrategyKind {
540 let resolver = StrategyResolver::new();
541 let tool = self.config.tool_name.as_deref().unwrap_or(default_tool);
542 resolver.resolve(tool)
543 }
544
545 fn build_budget_output(
553 &self,
554 result: budget::BudgetResult,
555 raw_chars: usize,
556 total: usize,
557 item_type: &str,
558 index: Option<page_index::PageIndex>,
559 json_fallback: Option<String>,
560 ) -> Result<TransformOutput> {
561 let content = if matches!(self.config.format, OutputFormat::Json) {
563 if let Some(json) = json_fallback {
564 truncation::truncate_string(&json, self.config.max_chars)
565 } else {
566 result.content
567 }
568 } else {
569 result.content
570 };
571
572 let mut output = TransformOutput::new(content).with_raw_chars(raw_chars);
573 output.included_count = result.included_items;
574
575 if result.trimmed {
577 output.truncated = true;
578 output.total_count = Some(total);
579
580 if self.config.include_hints {
581 if let Some(idx) = index {
582 if idx.total_pages > 1 {
583 let hint = format!(
584 "Chunk 1/{}: {} most relevant {} (by priority). {} total items across {} chunks. \
585 Use `chunk: N` parameter to fetch a specific chunk, or request all remaining data.",
586 idx.total_pages,
587 result.included_items,
588 item_type,
589 total,
590 idx.total_pages
591 );
592 output.page_index = Some(idx);
593 output.agent_hint = Some(hint);
594 } else {
595 let remaining = total.saturating_sub(result.included_items);
596 output.agent_hint = Some(format!(
597 "Showing {}/{} {}. {} items trimmed by budget.",
598 result.included_items, total, item_type, remaining
599 ));
600 }
601 } else {
602 let remaining = total.saturating_sub(result.included_items);
603 output.agent_hint = Some(format!(
604 "Showing {}/{} {}. {} items trimmed by budget. Use `chunk: N` parameter to fetch a specific chunk.",
605 result.included_items, total, item_type, remaining
606 ));
607 }
608 }
609 }
610
611 Ok(output)
612 }
613}
614
615impl Default for Pipeline {
616 fn default() -> Self {
617 Self::new()
618 }
619}
620
621#[cfg(test)]
622mod tests {
623 use super::*;
624 use devboy_core::User;
625
626 fn sample_issues() -> Vec<Issue> {
627 (1..=25)
628 .map(|i| Issue {
629 key: format!("gh#{}", i),
630 title: format!("Issue {}", i),
631 description: Some(format!("Description for issue {}", i)),
632 state: "open".to_string(),
633 source: "github".to_string(),
634 priority: None,
635 labels: vec!["bug".to_string()],
636 author: Some(User {
637 id: "1".to_string(),
638 username: "test".to_string(),
639 name: None,
640 email: None,
641 avatar_url: None,
642 }),
643 assignees: vec![],
644 url: Some(format!("https://github.com/test/repo/issues/{}", i)),
645 created_at: Some("2024-01-01T00:00:00Z".to_string()),
646 updated_at: Some("2024-01-02T00:00:00Z".to_string()),
647 attachments_count: None,
648 parent: None,
649 subtasks: vec![],
650 })
651 .collect()
652 }
653
654 fn sample_merge_requests() -> Vec<MergeRequest> {
655 (1..=5)
656 .map(|i| MergeRequest {
657 key: format!("mr#{}", i),
658 title: format!("MR {}", i),
659 description: Some(format!("MR description {}", i)),
660 state: "opened".to_string(),
661 source: "gitlab".to_string(),
662 source_branch: format!("feature-{}", i),
663 target_branch: "main".to_string(),
664 author: None,
665 assignees: vec![],
666 reviewers: vec![],
667 labels: vec![],
668 url: Some(format!(
669 "https://gitlab.com/test/repo/-/merge_requests/{}",
670 i
671 )),
672 created_at: Some("2024-01-01T00:00:00Z".to_string()),
673 updated_at: Some("2024-01-02T00:00:00Z".to_string()),
674 draft: false,
675 })
676 .collect()
677 }
678
679 fn sample_diffs() -> Vec<FileDiff> {
680 (1..=5)
681 .map(|i| FileDiff {
682 file_path: format!("src/file_{}.rs", i),
683 old_path: None,
684 new_file: i == 1,
685 deleted_file: false,
686 renamed_file: false,
687 diff: format!("+added line {}\n-removed line {}", i, i),
688 additions: Some(1),
689 deletions: Some(1),
690 })
691 .collect()
692 }
693
694 fn sample_comments() -> Vec<Comment> {
695 (1..=5)
696 .map(|i| Comment {
697 id: format!("{}", i),
698 body: format!("Comment body {}", i),
699 author: None,
700 created_at: Some("2024-01-01T00:00:00Z".to_string()),
701 updated_at: None,
702 position: None,
703 })
704 .collect()
705 }
706
707 fn sample_discussions() -> Vec<Discussion> {
708 (1..=5)
709 .map(|i| Discussion {
710 id: format!("{}", i),
711 resolved: i % 2 == 0,
712 resolved_by: None,
713 comments: vec![Comment {
714 id: format!("c{}", i),
715 body: format!("Discussion comment {}", i),
716 author: None,
717 created_at: None,
718 updated_at: None,
719 position: None,
720 }],
721 position: None,
722 })
723 .collect()
724 }
725
726 #[test]
729 fn test_pipeline_truncates_items() {
730 let pipeline = Pipeline::with_config(PipelineConfig {
732 max_chars: 200,
733 ..Default::default()
734 });
735
736 let issues = sample_issues();
737 let output = pipeline.transform_issues(issues).unwrap();
738
739 assert!(output.truncated);
740 assert_eq!(output.total_count, Some(25));
741 assert!(output.included_count < 25);
742 assert!(output.agent_hint.is_some());
743 }
744
745 #[test]
746 fn test_pipeline_no_truncation_when_under_limit() {
747 let pipeline = Pipeline::with_config(PipelineConfig {
748 max_chars: 100_000,
749 ..Default::default()
750 });
751
752 let issues: Vec<Issue> = sample_issues().into_iter().take(5).collect();
753 let output = pipeline.transform_issues(issues).unwrap();
754
755 assert!(!output.truncated);
756 assert!(output.agent_hint.is_none());
757 }
758
759 #[test]
762 fn test_toon_format_issues() {
763 let pipeline = Pipeline::with_config(PipelineConfig {
764 format: OutputFormat::Toon,
765 max_chars: 100_000,
766 ..Default::default()
767 });
768
769 let issues: Vec<Issue> = sample_issues().into_iter().take(3).collect();
770 let output = pipeline.transform_issues(issues).unwrap();
771
772 assert!(output.content.contains("gh#1"));
773 assert!(output.content.contains("Issue 1"));
774 }
775
776 #[test]
777 fn test_toon_format_merge_requests() {
778 let pipeline = Pipeline::with_config(PipelineConfig {
780 format: OutputFormat::Toon,
781 max_chars: 500,
782 ..Default::default()
783 });
784
785 let mrs = sample_merge_requests();
786 let output = pipeline.transform_merge_requests(mrs).unwrap();
787
788 assert!(output.content.contains("mr#1"));
789 assert!(output.content.contains("MR 1"));
790 assert!(output.truncated);
791 assert!(output.included_count < 5);
792 }
793
794 #[test]
795 fn test_toon_format_diffs() {
796 let pipeline = Pipeline::with_config(PipelineConfig {
798 format: OutputFormat::Toon,
799 max_chars: 200,
800 ..Default::default()
801 });
802
803 let diffs = sample_diffs();
804 let output = pipeline.transform_diffs(diffs).unwrap();
805
806 assert!(output.content.contains("src/file_1.rs"));
807 assert!(output.truncated);
808 assert!(output.included_count < 5);
809 }
810
811 #[test]
812 fn test_toon_format_comments() {
813 let pipeline = Pipeline::with_config(PipelineConfig {
816 format: OutputFormat::Toon,
817 max_chars: 300,
818 ..Default::default()
819 });
820
821 let comments = sample_comments();
822 let output = pipeline.transform_comments(comments).unwrap();
823
824 assert!(output.content.contains("Comment body"));
826 assert!(output.truncated);
827 assert!(output.included_count < 5);
828 }
829
830 #[test]
831 fn test_toon_format_discussions() {
832 let pipeline = Pipeline::with_config(PipelineConfig {
834 format: OutputFormat::Toon,
835 max_chars: 500,
836 ..Default::default()
837 });
838
839 let discussions = sample_discussions();
840 let output = pipeline.transform_discussions(discussions).unwrap();
841
842 assert!(output.content.contains("Discussion comment 1"));
843 assert!(output.truncated);
844 assert!(output.included_count < 5);
845 }
846
847 #[test]
850 fn test_json_format_issues() {
851 let pipeline = Pipeline::with_config(PipelineConfig {
852 format: OutputFormat::Json,
853 max_chars: 100_000,
854 ..Default::default()
855 });
856
857 let issues: Vec<Issue> = sample_issues().into_iter().take(2).collect();
858 let output = pipeline.transform_issues(issues).unwrap();
859
860 let parsed: Vec<Issue> = serde_json::from_str(&output.content).unwrap();
861 assert_eq!(parsed.len(), 2);
862 }
863
864 #[test]
865 fn test_json_format_merge_requests() {
866 let pipeline = Pipeline::with_config(PipelineConfig {
867 format: OutputFormat::Json,
868 max_chars: 100_000,
869 ..Default::default()
870 });
871
872 let mrs: Vec<MergeRequest> = sample_merge_requests().into_iter().take(2).collect();
873 let output = pipeline.transform_merge_requests(mrs).unwrap();
874
875 let parsed: Vec<MergeRequest> = serde_json::from_str(&output.content).unwrap();
876 assert_eq!(parsed.len(), 2);
877 }
878
879 #[test]
880 fn test_json_format_diffs() {
881 let pipeline = Pipeline::with_config(PipelineConfig {
882 format: OutputFormat::Json,
883 max_chars: 100_000,
884 ..Default::default()
885 });
886
887 let diffs: Vec<FileDiff> = sample_diffs().into_iter().take(2).collect();
888 let output = pipeline.transform_diffs(diffs).unwrap();
889
890 let parsed: Vec<FileDiff> = serde_json::from_str(&output.content).unwrap();
891 assert_eq!(parsed.len(), 2);
892 }
893
894 #[test]
895 fn test_json_format_comments() {
896 let pipeline = Pipeline::with_config(PipelineConfig {
897 format: OutputFormat::Json,
898 max_chars: 100_000,
899 ..Default::default()
900 });
901
902 let comments: Vec<Comment> = sample_comments().into_iter().take(2).collect();
903 let output = pipeline.transform_comments(comments).unwrap();
904
905 let parsed: Vec<Comment> = serde_json::from_str(&output.content).unwrap();
906 assert_eq!(parsed.len(), 2);
907 }
908
909 #[test]
910 fn test_json_format_discussions() {
911 let pipeline = Pipeline::with_config(PipelineConfig {
912 format: OutputFormat::Json,
913 max_chars: 100_000,
914 ..Default::default()
915 });
916
917 let discussions: Vec<Discussion> = sample_discussions().into_iter().take(2).collect();
918 let output = pipeline.transform_discussions(discussions).unwrap();
919
920 let parsed: Vec<Discussion> = serde_json::from_str(&output.content).unwrap();
921 assert_eq!(parsed.len(), 2);
922 }
923
924 #[test]
927 fn test_transform_output_to_string_with_hints() {
928 let output = TransformOutput::new("content".to_string());
929 assert_eq!(output.to_string_with_hints(), "content");
930
931 let output = TransformOutput::new("content".to_string()).with_truncation(
932 10,
933 5,
934 "hint text".to_string(),
935 );
936 assert!(output.to_string_with_hints().contains("content"));
937 assert!(output.to_string_with_hints().contains("hint text"));
938 }
939
940 #[test]
941 fn test_transform_output_with_truncation() {
942 let output =
943 TransformOutput::new("data".into()).with_truncation(100, 10, "90 more items".into());
944 assert!(output.truncated);
945 assert_eq!(output.total_count, Some(100));
946 assert_eq!(output.included_count, 10);
947 assert_eq!(output.agent_hint.as_deref(), Some("90 more items"));
948 }
949
950 #[test]
953 fn test_pipeline_config_default_values() {
954 let config = PipelineConfig::default();
955 assert_eq!(config.max_chars, 100_000);
956 assert_eq!(config.max_chars_per_item, 10_000);
957 assert_eq!(config.max_description_len, 10_000);
958 assert!(matches!(config.format, OutputFormat::Toon));
959 assert!(config.include_hints);
960 }
961
962 #[test]
963 fn test_pipeline_default() {
964 let pipeline = Pipeline::default();
965 let issues: Vec<Issue> = sample_issues().into_iter().take(1).collect();
966 let output = pipeline.transform_issues(issues).unwrap();
967 assert!(!output.content.is_empty());
968 }
969
970 #[test]
971 fn test_pipeline_hints_disabled() {
972 let pipeline = Pipeline::with_config(PipelineConfig {
974 max_chars: 200,
975 include_hints: false,
976 ..Default::default()
977 });
978
979 let issues = sample_issues();
980 let output = pipeline.transform_issues(issues).unwrap();
981
982 assert!(output.included_count < 25);
983 assert!(output.truncated);
985 assert!(output.agent_hint.is_none());
987 assert!(output.page_index.is_none());
988 }
989
990 #[test]
993 fn test_char_limit_applied() {
994 let pipeline = Pipeline::with_config(PipelineConfig {
995 max_chars: 100,
996 ..Default::default()
997 });
998
999 let issues = sample_issues();
1000 let output = pipeline.transform_issues(issues).unwrap();
1001
1002 assert!(output.truncated);
1003 }
1004
1005 #[test]
1006 fn test_char_limit_triggers_trimming() {
1007 let pipeline = Pipeline::with_config(PipelineConfig {
1008 max_chars: 50,
1009 ..Default::default()
1010 });
1011
1012 let issues: Vec<Issue> = sample_issues().into_iter().take(3).collect();
1013 let output = pipeline.transform_issues(issues).unwrap();
1014 assert!(output.truncated);
1015 }
1016
1017 #[test]
1020 fn test_transform_empty_issues() {
1021 let pipeline = Pipeline::new();
1022 let output = pipeline.transform_issues(vec![]).unwrap();
1023 assert!(!output.truncated);
1024 assert_eq!(output.included_count, 0);
1025 }
1026
1027 #[test]
1028 fn test_transform_empty_merge_requests() {
1029 let pipeline = Pipeline::new();
1030 let output = pipeline.transform_merge_requests(vec![]).unwrap();
1031 assert!(!output.truncated);
1032 assert_eq!(output.included_count, 0);
1033 }
1034
1035 #[test]
1036 fn test_transform_empty_diffs() {
1037 let pipeline = Pipeline::new();
1038 let output = pipeline.transform_diffs(vec![]).unwrap();
1039 assert!(!output.truncated);
1040 assert_eq!(output.included_count, 0);
1041 }
1042
1043 #[test]
1044 fn test_transform_empty_comments() {
1045 let pipeline = Pipeline::new();
1046 let output = pipeline.transform_comments(vec![]).unwrap();
1047 assert!(!output.truncated);
1048 assert_eq!(output.included_count, 0);
1049 }
1050
1051 #[test]
1052 fn test_transform_empty_discussions() {
1053 let pipeline = Pipeline::new();
1054 let output = pipeline.transform_discussions(vec![]).unwrap();
1055 assert!(!output.truncated);
1056 assert_eq!(output.included_count, 0);
1057 }
1058
1059 #[test]
1062 fn test_diff_content_truncated_per_item() {
1063 let pipeline = Pipeline::with_config(PipelineConfig {
1064 max_chars_per_item: 10,
1065 max_chars: 100_000,
1066 ..Default::default()
1067 });
1068
1069 let diffs = vec![FileDiff {
1070 file_path: "big.rs".into(),
1071 old_path: None,
1072 new_file: false,
1073 deleted_file: false,
1074 renamed_file: false,
1075 diff: "x".repeat(1000),
1076 additions: Some(100),
1077 deletions: Some(0),
1078 }];
1079
1080 let output = pipeline.transform_diffs(diffs).unwrap();
1081 assert!(output.content.len() < 1000);
1082 }
1083
1084 #[test]
1089 fn test_json_format_with_budget_trimming_issues() {
1090 let pipeline = Pipeline::with_config(PipelineConfig {
1091 format: OutputFormat::Json,
1092 max_chars: 200,
1093 ..Default::default()
1094 });
1095
1096 let issues = sample_issues();
1097 let output = pipeline.transform_issues(issues).unwrap();
1098
1099 assert!(output.truncated);
1100 assert!(output.included_count < 25);
1101 assert!(!output.content.is_empty());
1103 }
1104
1105 #[test]
1106 fn test_json_format_with_budget_trimming_merge_requests() {
1107 let pipeline = Pipeline::with_config(PipelineConfig {
1108 format: OutputFormat::Json,
1109 max_chars: 200,
1110 ..Default::default()
1111 });
1112
1113 let mrs = sample_merge_requests();
1114 let output = pipeline.transform_merge_requests(mrs).unwrap();
1115
1116 assert!(output.truncated);
1117 assert!(!output.content.is_empty());
1118 }
1119
1120 #[test]
1121 fn test_json_format_with_budget_trimming_diffs() {
1122 let pipeline = Pipeline::with_config(PipelineConfig {
1123 format: OutputFormat::Json,
1124 max_chars: 100,
1125 ..Default::default()
1126 });
1127
1128 let diffs = sample_diffs();
1129 let output = pipeline.transform_diffs(diffs).unwrap();
1130
1131 assert!(output.truncated);
1132 assert!(!output.content.is_empty());
1133 }
1134
1135 #[test]
1136 fn test_json_format_with_budget_trimming_comments() {
1137 let pipeline = Pipeline::with_config(PipelineConfig {
1138 format: OutputFormat::Json,
1139 max_chars: 100,
1140 ..Default::default()
1141 });
1142
1143 let comments = sample_comments();
1144 let output = pipeline.transform_comments(comments).unwrap();
1145
1146 assert!(output.truncated);
1147 assert!(!output.content.is_empty());
1148 }
1149
1150 #[test]
1151 fn test_json_format_with_budget_trimming_discussions() {
1152 let pipeline = Pipeline::with_config(PipelineConfig {
1153 format: OutputFormat::Json,
1154 max_chars: 100,
1155 ..Default::default()
1156 });
1157
1158 let discussions = sample_discussions();
1159 let output = pipeline.transform_discussions(discussions).unwrap();
1160
1161 assert!(output.truncated);
1162 assert!(!output.content.is_empty());
1163 }
1164
1165 #[test]
1168 fn test_pipeline_chunk_index_with_many_issues() {
1169 let issues: Vec<Issue> = (1..=50)
1171 .map(|i| Issue {
1172 key: format!("gh#{}", i),
1173 title: format!("Issue {} with a moderately long title for sizing", i),
1174 description: Some(format!(
1175 "Description for issue {} with substantial content to inflate token count significantly beyond budget",
1176 i
1177 )),
1178 state: "open".to_string(),
1179 source: "github".to_string(),
1180 priority: None,
1181 labels: vec!["bug".to_string(), "critical".to_string()],
1182 author: Some(User {
1183 id: "1".to_string(),
1184 username: "test".to_string(),
1185 name: None,
1186 email: None,
1187 avatar_url: None,
1188 }),
1189 assignees: vec![],
1190 url: Some(format!("https://github.com/test/repo/issues/{}", i)),
1191 created_at: Some("2024-01-01T00:00:00Z".to_string()),
1192 updated_at: Some("2024-01-02T00:00:00Z".to_string()),
1193 attachments_count: None,
1194 parent: None,
1195 subtasks: vec![],
1196 })
1197 .collect();
1198
1199 let pipeline = Pipeline::with_config(PipelineConfig {
1200 max_chars: 500,
1201 include_hints: true,
1202 ..Default::default()
1203 });
1204
1205 let output = pipeline.transform_issues(issues).unwrap();
1206
1207 assert!(output.truncated);
1208 assert!(output.included_count < 50);
1209 if let Some(ref hint) = output.agent_hint {
1211 assert!(
1212 hint.contains("Chunk") || hint.contains("Showing"),
1213 "Expected chunk or showing hint, got: {}",
1214 hint
1215 );
1216 }
1217 }
1218
1219 #[test]
1220 fn test_toon_smaller_than_json_for_issues() {
1221 let issues: Vec<Issue> = sample_issues().into_iter().take(10).collect();
1222
1223 let json_pipeline = Pipeline::with_config(PipelineConfig {
1224 format: OutputFormat::Json,
1225 max_chars: 1_000_000,
1226 ..Default::default()
1227 });
1228 let toon_pipeline = Pipeline::with_config(PipelineConfig {
1229 format: OutputFormat::Toon,
1230 max_chars: 1_000_000,
1231 ..Default::default()
1232 });
1233
1234 let json_output = json_pipeline.transform_issues(issues.clone()).unwrap();
1235 let toon_output = toon_pipeline.transform_issues(issues).unwrap();
1236
1237 assert!(
1238 toon_output.content.len() < json_output.content.len(),
1239 "TOON ({}) should be smaller than JSON ({})",
1240 toon_output.content.len(),
1241 json_output.content.len()
1242 );
1243 }
1244
1245 #[test]
1246 fn test_mckp_routes_issues_through_inner_table() {
1247 let issues: Vec<Issue> = sample_issues().into_iter().take(10).collect();
1248
1249 let mckp_pipeline = Pipeline::with_config(PipelineConfig {
1250 format: OutputFormat::Mckp,
1251 max_chars: 1_000_000,
1252 ..Default::default()
1253 });
1254 let json_pipeline = Pipeline::with_config(PipelineConfig {
1255 format: OutputFormat::Json,
1256 max_chars: 1_000_000,
1257 ..Default::default()
1258 });
1259
1260 let mckp_out = mckp_pipeline.transform_issues(issues.clone()).unwrap();
1261 let json_out = json_pipeline.transform_issues(issues).unwrap();
1262
1263 assert!(
1266 mckp_out.content.len() < json_out.content.len(),
1267 "MCKP ({}) should be smaller than JSON ({})",
1268 mckp_out.content.len(),
1269 json_out.content.len(),
1270 );
1271 for k in ["key", "title", "state", "source"] {
1274 assert!(
1275 mckp_out.content.contains(k),
1276 "MCKP output is missing field `{k}`: {}",
1277 &mckp_out.content[..mckp_out.content.len().min(200)]
1278 );
1279 }
1280 }
1281
1282 #[test]
1283 fn test_mckp_falls_back_to_pretty_json_on_unstable_keys() {
1284 let issues: Vec<Issue> = sample_issues().into_iter().take(1).collect();
1288 let mckp_pipeline = Pipeline::with_config(PipelineConfig {
1289 format: OutputFormat::Mckp,
1290 max_chars: 1_000_000,
1291 ..Default::default()
1292 });
1293 let out = mckp_pipeline.transform_issues(issues).unwrap();
1294 assert!(out.content.contains("gh#1"));
1295 }
1296}