1use crate::cache::{CacheStore, SqliteCache};
2use crate::parser::Bone;
3use crate::parser::Parser;
4use anyhow::Result;
5use std::path::{Path, PathBuf};
6use std::sync::OnceLock;
7
8#[allow(clippy::regex_creation_in_loops)]
11static RE_EMPTY_LINES: OnceLock<regex::Regex> = OnceLock::new();
12#[allow(clippy::regex_creation_in_loops)]
13static RE_BASE64: OnceLock<regex::Regex> = OnceLock::new();
14#[allow(clippy::regex_creation_in_loops)]
15static RE_LINE_COMMENT: OnceLock<regex::Regex> = OnceLock::new();
16#[allow(clippy::regex_creation_in_loops)]
17static RE_BLOCK_COMMENT: OnceLock<regex::Regex> = OnceLock::new();
18
19pub trait ContextPlugin: Send + Sync {
21 fn name(&self) -> &str;
23
24 fn detect(&self, directory: &Path) -> bool;
26
27 fn enrich(&self, file_path: &Path, base_bones: &mut Vec<Bone>) -> Result<()>;
30}
31
32pub enum OutputFormat {
34 Xml,
35 Markdown,
36}
37
38pub struct Packer {
40 cache: SqliteCache,
41 parser: Parser,
42 plugins: Vec<Box<dyn ContextPlugin>>,
43 format: OutputFormat,
44 max_tokens: Option<usize>,
45 no_file_summary: bool,
46 no_files: bool,
47 remove_comments: bool,
48 remove_empty_lines: bool,
49 truncate_base64: bool,
50}
51
52impl Packer {
53 fn xml_escape(s: &str) -> String {
54 s.replace('&', "&")
55 .replace('<', "<")
56 .replace('>', ">")
57 .replace('"', """)
58 .replace('\'', "'")
59 }
60
61 fn xml_escape_cdata(s: &str) -> String {
62 s.replace("]]>", "]]]]><![CDATA[>")
64 }
65
66 #[allow(clippy::too_many_arguments)]
68 pub fn new(
69 cache: SqliteCache,
70 parser: Parser,
71 format: OutputFormat,
72 max_tokens: Option<usize>,
73 no_file_summary: bool,
74 no_files: bool,
75 remove_comments: bool,
76 remove_empty_lines: bool,
77 truncate_base64: bool,
78 ) -> Self {
79 Self {
80 cache,
81 parser,
82 plugins: Vec::new(),
83 format,
84 max_tokens,
85 no_file_summary,
86 no_files,
87 remove_comments,
88 remove_empty_lines,
89 truncate_base64,
90 }
91 }
92
93 pub fn register_plugin(&mut self, plugin: Box<dyn ContextPlugin>) {
95 self.plugins.push(plugin);
96 }
97
98 #[allow(clippy::regex_creation_in_loops)]
103 pub fn pack(&self, file_paths: &[PathBuf]) -> Result<String> {
104 let _ = &self.parser;
105
106 let mut output = String::new();
107
108 let db_files_symbols: Vec<(String, Vec<(String, String)>)> =
110 self.cache.list_files_with_symbols().unwrap_or_default();
111
112 match self.format {
113 OutputFormat::Xml => output.push_str("<repository>\n"),
114 OutputFormat::Markdown => {}
115 }
116
117 let lookup_symbols = |path: &PathBuf| -> Vec<(String, String)> {
119 let path_str = path.to_string_lossy().to_string();
120 let path_normalized = path_str.strip_prefix("./").unwrap_or(&path_str);
121 db_files_symbols
122 .iter()
123 .find(|(db_p, _)| {
124 path_normalized.ends_with(db_p.as_str()) || db_p.ends_with(path_normalized)
125 })
126 .map(|(_, syms)| syms.clone())
127 .unwrap_or_default()
128 };
129
130 if !self.no_file_summary {
132 match self.format {
133 OutputFormat::Xml => {
134 output.push_str(" <skeleton_map>\n");
135 for path in file_paths {
136 output.push_str(&format!(
137 " <file path=\"{}\">\n",
138 Self::xml_escape(&path.display().to_string())
139 ));
140 for (kind, name) in lookup_symbols(path) {
141 output.push_str(&format!(
142 " <signature>{} {}</signature>\n",
143 Self::xml_escape(&kind),
144 Self::xml_escape(&name)
145 ));
146 }
147 output.push_str(" </file>\n");
148 }
149 output.push_str(" </skeleton_map>\n");
150 }
151 OutputFormat::Markdown => {
152 output.push_str("## Skeleton Map\n\n");
153 for path in file_paths {
154 output.push_str(&format!("- {}\n", path.display()));
155 for (kind, name) in lookup_symbols(path) {
156 output.push_str(&format!(" - {} {}\n", kind, name));
157 }
158 }
159 output.push('\n');
160 }
161 }
162 }
163
164 if self.no_files {
165 if let OutputFormat::Xml = self.format {
166 output.push_str("</repository>\n");
167 }
168 return Ok(output);
169 }
170
171 let bpe = tiktoken_rs::cl100k_base()
172 .map_err(|e| anyhow::anyhow!("Failed to initialize tokenizer: {}", e))?;
173 let mut degrade_to_bones = false;
174
175 for path in file_paths {
176 let mut raw_content = match std::fs::read_to_string(path) {
177 Ok(s) => s,
178 Err(e) => {
179 eprintln!(
180 "Warning: skipping unreadable file {}: {}",
181 path.display(),
182 e
183 );
184 continue;
185 }
186 };
187
188 if self.remove_empty_lines {
189 raw_content = RE_EMPTY_LINES
190 .get_or_init(|| {
191 regex::Regex::new(r"\n\s*\n").expect("valid static regex: empty lines")
192 })
193 .replace_all(&raw_content, "\n")
194 .to_string();
195 }
196
197 if self.truncate_base64 {
198 raw_content = RE_BASE64
200 .get_or_init(|| {
201 regex::Regex::new(r"[A-Za-z0-9+/=]{100,}")
202 .expect("valid static regex: base64")
203 })
204 .replace_all(&raw_content, "[TRUNCATED_BASE64]")
205 .to_string();
206 }
207
208 let content = {
210 let ext = path.extension().unwrap_or_default().to_string_lossy();
211 if let Some(spec) = crate::parser::get_spec_for_extension(&ext) {
212 let doc = crate::parser::parse_file(&raw_content, &spec);
213 let mut result = String::new();
214 let mut last_end = 0;
215
216 let mut indices: Vec<usize> = (0..doc.symbols.len()).collect();
217 indices.sort_by_key(|&i| doc.symbols[i].full_range.start);
218
219 for i in &indices {
220 let sym = &doc.symbols[*i];
221 if let Some(body_range) = &sym.body_range {
222 if body_range.start >= last_end {
223 result.push_str(&raw_content[last_end..body_range.start]);
224 result.push_str("...");
225 last_end = body_range.end;
226 }
227 }
228 }
229 result.push_str(&raw_content[last_end..]);
230
231 if self.remove_comments {
232 result = RE_BLOCK_COMMENT
234 .get_or_init(|| {
235 regex::Regex::new(r"(?s)/\*.*?\*/|<!--.*?-->")
236 .expect("valid static regex: block comment")
237 })
238 .replace_all(&result, "")
239 .to_string();
240 result = RE_LINE_COMMENT
241 .get_or_init(|| {
242 regex::Regex::new(r"(?m)(//|#).*\n")
243 .expect("valid static regex: line comment")
244 })
245 .replace_all(&result, "\n")
246 .to_string();
247 }
248
249 result
250 } else {
251 if self.remove_comments {
252 let no_blocks = RE_BLOCK_COMMENT
253 .get_or_init(|| {
254 regex::Regex::new(r"(?s)/\*.*?\*/|<!--.*?-->")
255 .expect("valid static regex: block comment")
256 })
257 .replace_all(&raw_content, "")
258 .to_string();
259 RE_LINE_COMMENT
260 .get_or_init(|| {
261 regex::Regex::new(r"(?m)(//|#).*\n")
262 .expect("valid static regex: line comment")
263 })
264 .replace_all(&no_blocks, "\n")
265 .to_string()
266 } else {
267 raw_content.clone() }
269 }
270 };
271
272 let mut bones = vec![Bone::default()];
273
274 for plugin in &self.plugins {
275 if plugin.detect(path) {
276 plugin.enrich(path, &mut bones)?;
277 }
278 }
279
280 if !degrade_to_bones {
281 if let Some(max) = self.max_tokens {
282 let current_tokens = bpe.encode_with_special_tokens(&output).len();
283 let content_tokens = bpe.encode_with_special_tokens(&content).len();
284 if current_tokens + content_tokens > max {
285 degrade_to_bones = true;
286 }
287 }
288 }
289
290 match self.format {
291 OutputFormat::Xml => {
292 output.push_str(&format!(
293 " <file path=\"{}\">\n",
294 Self::xml_escape(&path.display().to_string())
295 ));
296 if !degrade_to_bones {
297 let safe_content = Self::xml_escape_cdata(&content);
298 if safe_content == content {
299 output.push_str(&format!(
300 " <content><![CDATA[\n{}\n]]></content>\n",
301 safe_content
302 ));
303 } else {
304 output.push_str(&format!(
307 " <content>{}</content>\n",
308 Self::xml_escape(&content)
309 ));
310 }
311 }
312 let has_metadata = bones.iter().any(|b| !b.metadata.is_empty());
314 if has_metadata {
315 output.push_str(" <bones>\n");
316 for bone in &bones {
317 for (k, v) in &bone.metadata {
318 output.push_str(&format!(
319 " <metadata key=\"{}\">{}</metadata>\n",
320 Self::xml_escape(k),
321 Self::xml_escape(v)
322 ));
323 }
324 }
325 output.push_str(" </bones>\n");
326 }
327 output.push_str(" </file>\n");
328 }
329 OutputFormat::Markdown => {
330 output.push_str(&format!("## {}\n\n", path.display()));
331 if !degrade_to_bones {
332 let max_backticks = {
335 let mut max = 0usize;
336 let mut cur = 0usize;
337 for c in content.chars() {
338 if c == '`' {
339 cur += 1;
340 max = max.max(cur);
341 } else {
342 cur = 0;
343 }
344 }
345 max
346 };
347 let fence_len = max_backticks.max(2) + 1;
348 let fence = "`".repeat(fence_len);
349 let safe_content = if max_backticks >= fence_len - 1 {
355 let threshold = fence_len - 1;
356 let mut result = String::with_capacity(content.len());
357 let mut run = 0usize;
358 for c in content.chars() {
359 result.push(c);
360 if c == '`' {
361 run += 1;
362 if run == threshold {
363 result.push('\u{200B}'); run = 0;
365 }
366 } else {
367 run = 0;
368 }
369 }
370 result
371 } else {
372 content.clone()
373 };
374 output.push_str(&format!("{}\n{}\n{}\n\n", fence, safe_content, fence));
375 }
376 let has_metadata = bones.iter().any(|b| !b.metadata.is_empty());
378 if has_metadata {
379 output.push_str("Bones:\n");
380 for bone in &bones {
381 for (k, v) in &bone.metadata {
382 output.push_str(&format!("- {}: {}\n", k, v));
383 }
384 }
385 output.push('\n');
386 }
387 }
388 }
389 }
390
391 if let OutputFormat::Xml = self.format {
392 output.push_str("</repository>\n");
393 }
394
395 Ok(output)
396 }
397}
398
399#[cfg(test)]
400mod tests {
401 use super::*;
402 use std::io::Write;
403
404 struct MockPlugin;
405
406 impl ContextPlugin for MockPlugin {
407 fn name(&self) -> &str {
408 "mock"
409 }
410
411 fn detect(&self, _directory: &Path) -> bool {
412 true
413 }
414
415 fn enrich(&self, _file_path: &Path, base_bones: &mut Vec<Bone>) -> Result<()> {
416 for bone in base_bones.iter_mut() {
417 bone.metadata
418 .insert("injected".to_string(), "true".to_string());
419 }
420 Ok(())
421 }
422 }
423
424 fn make_temp_rs_file(content: &str) -> (tempfile::TempDir, PathBuf) {
425 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
426 let file_path = dir.path().join("sample.rs");
427 let mut f = std::fs::File::create(&file_path).expect("failed to create temp file");
428 f.write_all(content.as_bytes())
429 .expect("failed to write file content");
430 (dir, file_path)
431 }
432
433 #[test]
434 fn test_plugin_detect_and_enrich() {
435 let plugin = MockPlugin;
436 assert!(plugin.detect(Path::new(".")));
437 let mut bones = vec![Bone::default()];
438 plugin
439 .enrich(Path::new("any_file.rs"), &mut bones)
440 .expect("enrich should succeed");
441 assert_eq!(
442 bones[0]
443 .metadata
444 .get("injected")
445 .expect("injected key must be present"),
446 "true"
447 );
448 }
449
450 #[test]
451 fn test_packer_xml_format() {
452 let (_dir, file_path) = make_temp_rs_file("fn main() {}\n");
453 let packer = Packer::new(
454 SqliteCache::new_in_memory().expect("failed to create test cache"),
455 Parser {},
456 OutputFormat::Xml,
457 None,
458 false,
459 false,
460 false,
461 false,
462 false,
463 );
464 let result = packer.pack(&[file_path]);
465 assert!(result.is_ok());
466 let output = result.expect("pack should succeed");
467 assert!(output.contains("<repository>"));
468 }
469
470 #[test]
471 fn test_packer_markdown_format() {
472 let (_dir, file_path) = make_temp_rs_file("fn main() {}\n");
473 let packer = Packer::new(
474 SqliteCache::new_in_memory().expect("failed to create test cache"),
475 Parser {},
476 OutputFormat::Markdown,
477 None,
478 false,
479 false,
480 false,
481 false,
482 false,
483 );
484 let result = packer.pack(std::slice::from_ref(&file_path));
485 assert!(result.is_ok());
486 let output = result.expect("pack should succeed");
487 assert!(output.contains(&format!("## {}", file_path.display())));
488 }
489
490 #[test]
491 fn test_packer_with_plugins() {
492 let (_dir, file_path) = make_temp_rs_file("fn main() {}\n");
493 let mut packer = Packer::new(
494 SqliteCache::new_in_memory().expect("failed to create test cache"),
495 Parser {},
496 OutputFormat::Xml,
497 None,
498 false,
499 false,
500 false,
501 false,
502 false,
503 );
504 packer.register_plugin(Box::new(MockPlugin));
505 let result = packer.pack(&[file_path]);
506 assert!(result.is_ok());
507 let output = result.expect("pack should succeed");
508 assert!(output.contains("injected"));
509 }
510
511 #[test]
512 fn test_packer_empty_file_list() {
513 let packer = Packer::new(
514 SqliteCache::new_in_memory().expect("failed to create test cache"),
515 Parser {},
516 OutputFormat::Xml,
517 None,
518 false,
519 false,
520 false,
521 false,
522 false,
523 );
524 let result = packer.pack(&[]);
525 assert!(result.is_ok());
526 }
527
528 #[test]
529 fn test_packer_missing_file() {
530 let packer = Packer::new(
531 SqliteCache::new_in_memory().expect("failed to create test cache"),
532 Parser {},
533 OutputFormat::Xml,
534 None,
535 false,
536 false,
537 false,
538 false,
539 false,
540 );
541 let result = packer.pack(&[PathBuf::from("this_file_does_not_exist_xyz.rs")]);
542 assert!(result.is_ok());
544 }
545
546 #[test]
547 fn test_packer_generates_skeleton_map_at_top() {
548 let (_dir, file_path) = make_temp_rs_file("fn main() {}\n");
549 let packer = Packer::new(
550 SqliteCache::new_in_memory().expect("failed to create test cache"),
551 Parser {},
552 OutputFormat::Xml,
553 None,
554 false,
555 false,
556 false,
557 false,
558 false,
559 );
560 let result = packer.pack(&[file_path]);
561 assert!(result.is_ok());
562 let output = result.expect("pack should succeed");
563 assert!(output.starts_with("<repository>\n <skeleton_map>"));
565 }
566
567 #[test]
568 fn test_packer_token_governor_degrades_to_bones() {
569 let (_dir, file_path) = make_temp_rs_file("fn main() { let x = 1; }\n");
571 let packer = Packer::new(
572 SqliteCache::new_in_memory().expect("failed to create test cache"),
573 Parser {},
574 OutputFormat::Xml,
575 Some(10),
576 false,
577 false,
578 false,
579 false,
580 false,
581 );
582 let result = packer.pack(&[file_path]);
583 assert!(result.is_ok());
584 let output = result.expect("pack should succeed");
585 assert!(!output.contains("<content>"));
587 }
588
589 fn make_temp_file(dir: &tempfile::TempDir, filename: &str, content: &str) -> PathBuf {
593 let file_path = dir.path().join(filename);
594 let mut f = std::fs::File::create(&file_path).expect("failed to create temp file");
595 f.write_all(content.as_bytes())
596 .expect("failed to write file content");
597 file_path
598 }
599
600 #[test]
609 fn test_xml_signature_special_chars_are_escaped() {
610 use crate::cache::CacheStore;
611
612 let cache = SqliteCache::new_in_memory().expect("failed to create test cache");
613 cache.init().expect("failed to init cache schema");
614
615 let file_id = cache
617 .upsert_file("bad.rs", "h1", b"fn bad() {}")
618 .expect("upsert_file should succeed");
619 cache
620 .insert_symbol(&crate::cache::Symbol {
621 id: "s1".to_string(),
622 file_id,
623 name: "<script>&\"test\"</script>".to_string(),
624 kind: "function".to_string(),
625 byte_offset: 0,
626 byte_length: 11,
627 })
628 .expect("symbol insert should succeed");
629
630 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
631 let file_path = make_temp_file(&dir, "bad.rs", "fn bad() {}\n");
632
633 let packer = Packer::new(
634 cache,
635 Parser {},
636 OutputFormat::Xml,
637 None,
638 false, false, false,
641 false,
642 false,
643 );
644 let output = packer.pack(&[file_path]).expect("pack should succeed");
645
646 assert!(
649 !output.contains("<script>"),
650 "Bare <script> tag should not appear in XML output; expected escaped form"
651 );
652 assert!(
653 output.contains("<script>") || output.contains("&"),
654 "XML special characters in symbol names must be escaped"
655 );
656 }
657
658 #[test]
661 fn test_xml_path_attribute_special_chars_are_escaped() {
662 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
663 let file_path = make_temp_file(&dir, "a&b.txt", "hello world\n");
665
666 let packer = Packer::new(
667 SqliteCache::new_in_memory().expect("failed to create test cache"),
668 Parser {},
669 OutputFormat::Xml,
670 None,
671 false,
672 false,
673 false,
674 false,
675 false,
676 );
677 let output = packer.pack(&[file_path]).expect("pack should succeed");
678
679 assert!(
681 !output.contains("path=\"") || !output.contains("a&b.txt\""),
682 "Bare & in path attribute must be escaped as &"
683 );
684 }
685
686 #[test]
689 fn test_xml_cdata_cdata_end_sequence_is_escaped() {
690 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
691 let tricky = "let s = \"]]>\";\n";
693 let file_path = make_temp_file(&dir, "tricky.txt", tricky);
694
695 let packer = Packer::new(
696 SqliteCache::new_in_memory().expect("failed to create test cache"),
697 Parser {},
698 OutputFormat::Xml,
699 None,
700 false,
701 false,
702 false,
703 false,
704 false,
705 );
706 let output = packer.pack(&[file_path]).expect("pack should succeed");
707
708 let positions: Vec<_> = output.match_indices("]]>").collect();
714 for (idx, _) in &positions {
715 let after = &output[idx + 3..];
716 assert!(
717 after.starts_with("</content>"),
718 "Found ]]> at position {} that is not the CDATA closing sequence; \
719 raw content may break XML well-formedness",
720 idx
721 );
722 }
723 }
724
725 #[test]
728 fn test_xml_output_basic_well_formedness() {
729 let (_dir, file_path) = make_temp_rs_file("fn main() {}\n");
730
731 let packer = Packer::new(
732 SqliteCache::new_in_memory().expect("failed to create test cache"),
733 Parser {},
734 OutputFormat::Xml,
735 None,
736 false,
737 false,
738 false,
739 false,
740 false,
741 );
742 let output = packer.pack(&[file_path]).expect("pack should succeed");
743
744 assert!(
745 output.starts_with("<repository>"),
746 "XML output must start with <repository>"
747 );
748 assert!(
749 output.trim_end().ends_with("</repository>"),
750 "XML output must end with </repository>"
751 );
752
753 let cdata_re =
755 regex::Regex::new(r"(?s)<!\[CDATA\[.*?]]>").expect("failed to compile cdata regex");
756 let stripped = cdata_re.replace_all(&output, "");
757
758 for (i, ch) in stripped.char_indices() {
760 if ch == '<' {
761 let next = stripped[i + 1..].chars().next();
762 assert!(
763 matches!(next, Some('/' | '!' | '?' | 'a'..='z' | 'A'..='Z')),
764 "Bare < found at position {} outside of CDATA: ...{}...",
765 i,
766 &stripped[i.saturating_sub(10)..std::cmp::min(i + 20, stripped.len())]
767 );
768 }
769 }
770 }
771
772 #[test]
779 fn test_markdown_skeleton_map_indentation() {
780 use crate::cache::CacheStore;
781
782 let cache = SqliteCache::new_in_memory().expect("failed to create test cache");
783 cache.init().expect("failed to init cache schema");
784
785 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
786 let file_path = make_temp_file(&dir, "lib.rs", "fn alpha() {}\n");
787
788 let file_id = cache
789 .upsert_file(file_path.to_string_lossy().as_ref(), "h2", b"fn alpha() {}")
790 .expect("upsert_file should succeed");
791 cache
792 .insert_symbol(&crate::cache::Symbol {
793 id: "s_alpha".to_string(),
794 file_id,
795 name: "alpha".to_string(),
796 kind: "function".to_string(),
797 byte_offset: 0,
798 byte_length: 13,
799 })
800 .expect("symbol insert should succeed");
801
802 let packer = Packer::new(
803 cache,
804 Parser {},
805 OutputFormat::Markdown,
806 None,
807 false,
808 true, false,
810 false,
811 false,
812 );
813 let output = packer.pack(&[file_path]).expect("pack should succeed");
814
815 assert!(
817 output.contains("- "),
818 "File bullet not found in Markdown output"
819 );
820
821 assert!(
823 output.contains(" - function alpha"),
824 "Symbol entries in skeleton map must be indented with two spaces; got:\n{}",
825 output
826 );
827 }
828
829 #[test]
832 fn test_markdown_symbol_names_with_special_chars() {
833 use crate::cache::CacheStore;
834
835 let cache = SqliteCache::new_in_memory().expect("failed to create test cache");
836 cache.init().expect("failed to init cache schema");
837
838 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
839 let file_path = make_temp_file(&dir, "weird.rs", "fn weird() {}\n");
840
841 let file_id = cache
842 .upsert_file(file_path.to_string_lossy().as_ref(), "h3", b"fn weird() {}")
843 .expect("upsert_file should succeed");
844 cache
846 .insert_symbol(&crate::cache::Symbol {
847 id: "s_weird".to_string(),
848 file_id,
849 name: "*_[weird`_]*".to_string(),
850 kind: "function".to_string(),
851 byte_offset: 0,
852 byte_length: 13,
853 })
854 .expect("symbol insert should succeed");
855
856 let packer = Packer::new(
857 cache,
858 Parser {},
859 OutputFormat::Markdown,
860 None,
861 false,
862 true, false,
864 false,
865 false,
866 );
867 let output = packer.pack(&[file_path]).expect("pack should succeed");
868
869 assert!(output.contains("- "), "File bullet disappeared");
871
872 assert!(
874 output.contains("*_[weird`_]*"),
875 "Symbol name with Markdown special chars should appear verbatim"
876 );
877 }
878
879 #[test]
885 fn test_token_governor_generous_budget_includes_content() {
886 let (_dir, file_path) = make_temp_rs_file("fn main() { let x = 42; }\n");
887
888 let packer = Packer::new(
889 SqliteCache::new_in_memory().expect("failed to create test cache"),
890 Parser {},
891 OutputFormat::Xml,
892 Some(100_000), false,
894 false,
895 false,
896 false,
897 false,
898 );
899 let output = packer.pack(&[file_path]).expect("pack should succeed");
900
901 assert!(
903 output.contains("<content><![CDATA["),
904 "Expected <content> block when budget is generous; got:\n{}",
905 output
906 );
907 }
908
909 #[test]
911 fn test_token_governor_one_token_budget_omits_content() {
912 let (_dir, file_path) = make_temp_rs_file("fn main() { let x = 42; }\n");
913
914 let packer = Packer::new(
915 SqliteCache::new_in_memory().expect("failed to create test cache"),
916 Parser {},
917 OutputFormat::Xml,
918 Some(1), false,
920 false,
921 false,
922 false,
923 false,
924 );
925 let result = packer.pack(&[file_path]);
926
927 assert!(result.is_ok(), "pack() must not error under tight budget");
929 let output = result.expect("pack should succeed");
930
931 assert!(
933 !output.contains("<content>"),
934 "No <content> block expected when budget is 1 token"
935 );
936 }
937
938 #[test]
940 fn test_token_governor_graceful_degradation_no_panic() {
941 let (_dir, file_path) =
942 make_temp_rs_file("fn a() { 1 }\nfn b() { 2 }\nfn c() { 3 }\nfn d() { 4 }\n");
943
944 for budget in [0usize, 1, 5, 50] {
945 let packer = Packer::new(
946 SqliteCache::new_in_memory().expect("failed to create test cache"),
947 Parser {},
948 OutputFormat::Xml,
949 Some(budget),
950 false,
951 false,
952 false,
953 false,
954 false,
955 );
956 let result = packer.pack(std::slice::from_ref(&file_path));
957 assert!(
958 result.is_ok(),
959 "pack() panicked or errored at max_tokens={}",
960 budget
961 );
962 }
963 }
964
965 #[test]
972 fn test_no_files_and_no_file_summary_together() {
973 let (_dir, file_path) = make_temp_rs_file("fn main() {}\n");
974
975 let packer = Packer::new(
976 SqliteCache::new_in_memory().expect("failed to create test cache"),
977 Parser {},
978 OutputFormat::Xml,
979 None,
980 true, true, false,
983 false,
984 false,
985 );
986 let output = packer.pack(&[file_path]).expect("pack should succeed");
987
988 let trimmed = output.trim();
990 assert_eq!(
991 trimmed, "<repository>\n</repository>",
992 "With both no_files and no_file_summary, output should be just the repository tags; got:\n{}",
993 trimmed
994 );
995 }
996
997 #[test]
999 fn test_remove_line_comments_from_rust() {
1000 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1001 let file_path = make_temp_file(
1003 &dir,
1004 "comments.txt",
1005 "let x = 1; // this is a comment\nlet y = 2;\n",
1006 );
1007
1008 let packer = Packer::new(
1009 SqliteCache::new_in_memory().expect("failed to create test cache"),
1010 Parser {},
1011 OutputFormat::Xml,
1012 None,
1013 false,
1014 false,
1015 true, false,
1017 false,
1018 );
1019 let output = packer.pack(&[file_path]).expect("pack should succeed");
1020
1021 assert!(
1022 !output.contains("// this is a comment"),
1023 "Line comment should be stripped; got:\n{}",
1024 output
1025 );
1026 assert!(
1027 output.contains("let x = 1;"),
1028 "Non-comment code should remain after stripping line comments"
1029 );
1030 }
1031
1032 #[test]
1034 fn test_remove_block_comments() {
1035 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1036 let file_path = make_temp_file(
1037 &dir,
1038 "block_comments.txt",
1039 "int x = /* inline block */ 42;\n/* multi\nline\ncomment */\nint y = 1;\n",
1040 );
1041
1042 let packer = Packer::new(
1043 SqliteCache::new_in_memory().expect("failed to create test cache"),
1044 Parser {},
1045 OutputFormat::Xml,
1046 None,
1047 false,
1048 false,
1049 true, false,
1051 false,
1052 );
1053 let output = packer.pack(&[file_path]).expect("pack should succeed");
1054
1055 assert!(
1056 !output.contains("inline block"),
1057 "Inline block comment should be stripped"
1058 );
1059 assert!(
1060 !output.contains("multi\nline\ncomment"),
1061 "Multi-line block comment should be stripped"
1062 );
1063 assert!(
1064 output.contains("int x ="),
1065 "Code outside block comment should be preserved"
1066 );
1067 }
1068
1069 #[test]
1072 fn test_remove_empty_lines_collapses_blanks() {
1073 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1074 let file_path = make_temp_file(
1075 &dir,
1076 "blanks.txt",
1077 "line one\n\n\n\nline two\n\n\nline three\n",
1078 );
1079
1080 let packer = Packer::new(
1081 SqliteCache::new_in_memory().expect("failed to create test cache"),
1082 Parser {},
1083 OutputFormat::Xml,
1084 None,
1085 false,
1086 false,
1087 false,
1088 true, false,
1090 );
1091 let output = packer.pack(&[file_path]).expect("pack should succeed");
1092
1093 assert!(
1095 !output.contains("\n\n\n"),
1096 "Multiple consecutive blank lines should be collapsed to a single newline; got:\n{}",
1097 output
1098 );
1099 assert!(
1100 output.contains("line one"),
1101 "Non-blank lines must be preserved"
1102 );
1103 assert!(
1104 output.contains("line two"),
1105 "Non-blank lines must be preserved"
1106 );
1107 }
1108
1109 #[test]
1112 fn test_truncate_base64_replaces_long_strings() {
1113 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1114 let long_token = "A".repeat(100);
1116 let content = format!("key = {}\n", long_token);
1117 let file_path = make_temp_file(&dir, "tokens.txt", &content);
1118
1119 let packer = Packer::new(
1120 SqliteCache::new_in_memory().expect("failed to create test cache"),
1121 Parser {},
1122 OutputFormat::Xml,
1123 None,
1124 false,
1125 false,
1126 false,
1127 false,
1128 true, );
1130 let output = packer.pack(&[file_path]).expect("pack should succeed");
1131
1132 assert!(
1133 output.contains("[TRUNCATED_BASE64]"),
1134 "A 100-char alphanumeric string should be replaced with [TRUNCATED_BASE64]"
1135 );
1136 assert!(
1137 !output.contains(&long_token),
1138 "The original long token must not appear in output after truncation"
1139 );
1140 }
1141
1142 #[test]
1144 fn test_truncate_base64_preserves_short_strings() {
1145 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1146 let short_token = "B".repeat(99);
1148 let content = format!("key = {}\n", short_token);
1149 let file_path = make_temp_file(&dir, "short_tokens.txt", &content);
1150
1151 let packer = Packer::new(
1152 SqliteCache::new_in_memory().expect("failed to create test cache"),
1153 Parser {},
1154 OutputFormat::Xml,
1155 None,
1156 false,
1157 false,
1158 false,
1159 false,
1160 true, );
1162 let output = packer.pack(&[file_path]).expect("pack should succeed");
1163
1164 assert!(
1165 output.contains(&short_token),
1166 "A 99-char string must NOT be truncated"
1167 );
1168 assert!(
1169 !output.contains("[TRUNCATED_BASE64]"),
1170 "No truncation should occur for strings under 100 chars"
1171 );
1172 }
1173
1174 #[test]
1180 fn test_three_files_all_appear_in_skeleton_map() {
1181 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1182 let f1 = make_temp_file(&dir, "one.txt", "content one\n");
1183 let f2 = make_temp_file(&dir, "two.txt", "content two\n");
1184 let f3 = make_temp_file(&dir, "three.txt", "content three\n");
1185
1186 let packer = Packer::new(
1187 SqliteCache::new_in_memory().expect("failed to create test cache"),
1188 Parser {},
1189 OutputFormat::Xml,
1190 None,
1191 false,
1192 false,
1193 false,
1194 false,
1195 false,
1196 );
1197 let output = packer.pack(&[f1, f2, f3]).expect("pack should succeed");
1198
1199 assert!(output.contains("one.txt"), "one.txt missing from output");
1200 assert!(output.contains("two.txt"), "two.txt missing from output");
1201 assert!(
1202 output.contains("three.txt"),
1203 "three.txt missing from output"
1204 );
1205 }
1206
1207 #[test]
1210 fn test_skeleton_map_preserves_input_order() {
1211 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1212 let f1 = make_temp_file(&dir, "alpha.txt", "alpha\n");
1213 let f2 = make_temp_file(&dir, "beta.txt", "beta\n");
1214 let f3 = make_temp_file(&dir, "gamma.txt", "gamma\n");
1215
1216 let packer = Packer::new(
1217 SqliteCache::new_in_memory().expect("failed to create test cache"),
1218 Parser {},
1219 OutputFormat::Xml,
1220 None,
1221 false,
1222 false,
1223 false,
1224 false,
1225 false,
1226 );
1227 let output = packer.pack(&[f1, f2, f3]).expect("pack should succeed");
1228
1229 let pos_alpha = output.find("alpha.txt").expect("alpha.txt not found");
1230 let pos_beta = output.find("beta.txt").expect("beta.txt not found");
1231 let pos_gamma = output.find("gamma.txt").expect("gamma.txt not found");
1232
1233 assert!(
1234 pos_alpha < pos_beta && pos_beta < pos_gamma,
1235 "Files must appear in the skeleton map in the order they were supplied"
1236 );
1237 }
1238
1239 #[test]
1247 fn test_deleted_file_is_gracefully_skipped() {
1248 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1249 let file_path = make_temp_file(&dir, "ephemeral.txt", "will be deleted\n");
1250
1251 std::fs::remove_file(&file_path).expect("failed to delete ephemeral file");
1253
1254 let packer = Packer::new(
1255 SqliteCache::new_in_memory().expect("failed to create test cache"),
1256 Parser {},
1257 OutputFormat::Xml,
1258 None,
1259 false,
1260 false,
1261 false,
1262 false,
1263 false,
1264 );
1265 let result = packer.pack(&[file_path]);
1266
1267 assert!(
1268 result.is_ok(),
1269 "pack() must not return Err when a file has been deleted; got: {:?}",
1270 result.err()
1271 );
1272
1273 let output = result.expect("pack should succeed even when file is deleted");
1274 assert!(
1276 output.contains("<repository>"),
1277 "Output must start with <repository>"
1278 );
1279 assert!(
1280 output.trim_end().ends_with("</repository>"),
1281 "Output must end with </repository>"
1282 );
1283 assert!(
1285 !output.contains("will be deleted"),
1286 "Content of deleted file must not appear in output"
1287 );
1288 }
1289
1290 #[test]
1301 fn test_plugin_metadata_xml_escaping() {
1302 struct XmlDangerousPlugin;
1303
1304 impl ContextPlugin for XmlDangerousPlugin {
1305 fn name(&self) -> &str {
1306 "xml_dangerous"
1307 }
1308
1309 fn detect(&self, _directory: &Path) -> bool {
1310 true
1311 }
1312
1313 fn enrich(&self, _file_path: &Path, base_bones: &mut Vec<Bone>) -> Result<()> {
1314 for bone in base_bones.iter_mut() {
1315 bone.metadata.insert(
1317 "key<with>&\"special".to_string(),
1318 "</metadata><malicious>payload</malicious><metadata key=\"x\">".to_string(),
1320 );
1321 }
1322 Ok(())
1323 }
1324 }
1325
1326 let (_dir, file_path) = make_temp_rs_file("fn main() {}\n");
1327 let mut packer = Packer::new(
1328 SqliteCache::new_in_memory().expect("failed to create test cache"),
1329 Parser {},
1330 OutputFormat::Xml,
1331 None,
1332 false,
1333 false,
1334 false,
1335 false,
1336 false,
1337 );
1338 packer.register_plugin(Box::new(XmlDangerousPlugin));
1339
1340 let output = packer.pack(&[file_path]).expect("pack should succeed");
1341
1342 assert!(
1344 !output.contains("<malicious>"),
1345 "Bare <malicious> tag found in output — metadata value was not XML-escaped; got:\n{}",
1346 output
1347 );
1348 assert!(
1349 !output.contains("</malicious>"),
1350 "Bare </malicious> tag found in output — metadata value was not XML-escaped; got:\n{}",
1351 output
1352 );
1353
1354 assert!(
1357 output.contains("<") || output.contains(">") || output.contains("&"),
1358 "Expected XML-escaped entities (<, >, or &) in metadata output; got:\n{}",
1359 output
1360 );
1361
1362 assert!(
1364 output.contains("</repository>"),
1365 "Output must still contain </repository> after metadata injection; got:\n{}",
1366 output
1367 );
1368 }
1369}