1use crate::cache::{CacheStore, SqliteCache};
2use crate::parser::Bone;
3use crate::parser::Parser;
4use anyhow::Result;
5use std::path::{Path, PathBuf};
6use std::sync::OnceLock;
7
8#[allow(clippy::regex_creation_in_loops)]
11static RE_EMPTY_LINES: OnceLock<regex::Regex> = OnceLock::new();
12#[allow(clippy::regex_creation_in_loops)]
13static RE_BASE64: OnceLock<regex::Regex> = OnceLock::new();
14#[allow(clippy::regex_creation_in_loops)]
15static RE_LINE_COMMENT: OnceLock<regex::Regex> = OnceLock::new();
16#[allow(clippy::regex_creation_in_loops)]
17static RE_BLOCK_COMMENT: OnceLock<regex::Regex> = OnceLock::new();
18
19pub trait ContextPlugin: Send + Sync {
21 fn name(&self) -> &str;
23
24 fn detect(&self, directory: &Path) -> bool;
26
27 fn enrich(&self, file_path: &Path, base_bones: &mut Vec<Bone>) -> Result<()>;
30}
31
32pub enum OutputFormat {
34 Xml,
35 Markdown,
36}
37
38impl OutputFormat {
39 pub fn parse(format: &str) -> Result<Self> {
40 match format.to_lowercase().as_str() {
41 "xml" => Ok(Self::Xml),
42 "markdown" => Ok(Self::Markdown),
43 other => anyhow::bail!("Invalid output format: {other}. Expected 'xml' or 'markdown'"),
44 }
45 }
46}
47
48pub struct Packer {
50 cache: SqliteCache,
51 parser: Parser,
52 workspace_root: PathBuf,
53 plugins: Vec<Box<dyn ContextPlugin>>,
54 format: OutputFormat,
55 max_tokens: Option<usize>,
56 no_file_summary: bool,
57 no_files: bool,
58 remove_comments: bool,
59 remove_empty_lines: bool,
60 truncate_base64: bool,
61}
62
63impl Packer {
64 fn xml_escape(s: &str) -> String {
65 s.replace('&', "&")
66 .replace('<', "<")
67 .replace('>', ">")
68 .replace('"', """)
69 .replace('\'', "'")
70 }
71
72 fn xml_escape_cdata(s: &str) -> String {
73 s.replace("]]>", "]]]]><![CDATA[>")
75 }
76
77 #[allow(clippy::too_many_arguments)]
79 pub fn new(
80 cache: SqliteCache,
81 parser: Parser,
82 format: OutputFormat,
83 max_tokens: Option<usize>,
84 no_file_summary: bool,
85 no_files: bool,
86 remove_comments: bool,
87 remove_empty_lines: bool,
88 truncate_base64: bool,
89 ) -> Self {
90 Self::with_workspace_root(
91 cache,
92 parser,
93 PathBuf::from("."),
94 format,
95 max_tokens,
96 no_file_summary,
97 no_files,
98 remove_comments,
99 remove_empty_lines,
100 truncate_base64,
101 )
102 }
103
104 #[allow(clippy::too_many_arguments)]
105 pub fn with_workspace_root(
106 cache: SqliteCache,
107 parser: Parser,
108 workspace_root: PathBuf,
109 format: OutputFormat,
110 max_tokens: Option<usize>,
111 no_file_summary: bool,
112 no_files: bool,
113 remove_comments: bool,
114 remove_empty_lines: bool,
115 truncate_base64: bool,
116 ) -> Self {
117 let _ = cache.init();
118 Self {
119 cache,
120 parser,
121 workspace_root,
122 plugins: Vec::new(),
123 format,
124 max_tokens,
125 no_file_summary,
126 no_files,
127 remove_comments,
128 remove_empty_lines,
129 truncate_base64,
130 }
131 }
132
133 pub fn register_plugin(&mut self, plugin: Box<dyn ContextPlugin>) {
135 self.plugins.push(plugin);
136 }
137
138 #[allow(clippy::regex_creation_in_loops)]
143 pub fn pack(&self, file_paths: &[PathBuf]) -> Result<String> {
144 let _ = &self.parser;
145
146 let mut output = String::new();
147 let active_plugins: Vec<&dyn ContextPlugin> = self
148 .plugins
149 .iter()
150 .filter(|plugin| plugin.detect(&self.workspace_root))
151 .map(|plugin| plugin.as_ref())
152 .collect();
153
154 match self.format {
155 OutputFormat::Xml => output.push_str("<repository>\n"),
156 OutputFormat::Markdown => {}
157 }
158
159 let lookup_symbols = |path: &PathBuf| -> Result<Vec<(String, String)>> {
160 let relative_path = path
161 .strip_prefix(&self.workspace_root)
162 .unwrap_or(path)
163 .to_string_lossy()
164 .to_string();
165 self.cache
166 .get_file_symbols(&relative_path)
167 .map_err(Into::into)
168 };
169
170 if !self.no_file_summary {
172 match self.format {
173 OutputFormat::Xml => {
174 output.push_str(" <skeleton_map>\n");
175 for path in file_paths {
176 output.push_str(&format!(
177 " <file path=\"{}\">\n",
178 Self::xml_escape(&path.display().to_string())
179 ));
180 for (kind, name) in lookup_symbols(path)? {
181 output.push_str(&format!(
182 " <signature>{} {}</signature>\n",
183 Self::xml_escape(&kind),
184 Self::xml_escape(&name)
185 ));
186 }
187 output.push_str(" </file>\n");
188 }
189 output.push_str(" </skeleton_map>\n");
190 }
191 OutputFormat::Markdown => {
192 output.push_str("## Skeleton Map\n\n");
193 for path in file_paths {
194 output.push_str(&format!("- {}\n", path.display()));
195 for (kind, name) in lookup_symbols(path)? {
196 output.push_str(&format!(" - {} {}\n", kind, name));
197 }
198 }
199 output.push('\n');
200 }
201 }
202 }
203
204 if self.no_files {
205 if let OutputFormat::Xml = self.format {
206 output.push_str("</repository>\n");
207 }
208 return Ok(output);
209 }
210
211 let bpe = tiktoken_rs::cl100k_base()
212 .map_err(|e| anyhow::anyhow!("Failed to initialize tokenizer: {}", e))?;
213 let mut degrade_to_bones = false;
214
215 for path in file_paths {
216 let mut raw_content = match std::fs::read_to_string(path) {
217 Ok(s) => s,
218 Err(e) => {
219 eprintln!(
220 "Warning: skipping unreadable file {}: {}",
221 path.display(),
222 e
223 );
224 continue;
225 }
226 };
227
228 if self.remove_empty_lines {
229 raw_content = RE_EMPTY_LINES
230 .get_or_init(|| {
231 regex::Regex::new(r"\n\s*\n").expect("valid static regex: empty lines")
232 })
233 .replace_all(&raw_content, "\n")
234 .to_string();
235 }
236
237 if self.truncate_base64 {
238 raw_content = RE_BASE64
240 .get_or_init(|| {
241 regex::Regex::new(r"[A-Za-z0-9+/=]{100,}")
242 .expect("valid static regex: base64")
243 })
244 .replace_all(&raw_content, "[TRUNCATED_BASE64]")
245 .to_string();
246 }
247
248 let content = {
250 let ext = path.extension().unwrap_or_default().to_string_lossy();
251 if let Some(spec) = crate::parser::get_spec_for_extension(&ext) {
252 let doc = crate::parser::parse_file(&raw_content, &spec);
253 let mut result = String::new();
254 let mut last_end = 0;
255
256 let mut indices: Vec<usize> = (0..doc.symbols.len()).collect();
257 indices.sort_by_key(|&i| doc.symbols[i].full_range.start);
258
259 for i in &indices {
260 let sym = &doc.symbols[*i];
261 if let Some(body_range) = &sym.body_range {
262 if body_range.start >= last_end {
263 result.push_str(&raw_content[last_end..body_range.start]);
264 result.push_str("...");
265 last_end = body_range.end;
266 }
267 }
268 }
269 result.push_str(&raw_content[last_end..]);
270
271 if self.remove_comments {
272 result = RE_BLOCK_COMMENT
274 .get_or_init(|| {
275 regex::Regex::new(r"(?s)/\*.*?\*/|<!--.*?-->")
276 .expect("valid static regex: block comment")
277 })
278 .replace_all(&result, "")
279 .to_string();
280 result = RE_LINE_COMMENT
281 .get_or_init(|| {
282 regex::Regex::new(r"(?m)(//|#).*\n")
283 .expect("valid static regex: line comment")
284 })
285 .replace_all(&result, "\n")
286 .to_string();
287 }
288
289 result
290 } else {
291 if self.remove_comments {
292 let no_blocks = RE_BLOCK_COMMENT
293 .get_or_init(|| {
294 regex::Regex::new(r"(?s)/\*.*?\*/|<!--.*?-->")
295 .expect("valid static regex: block comment")
296 })
297 .replace_all(&raw_content, "")
298 .to_string();
299 RE_LINE_COMMENT
300 .get_or_init(|| {
301 regex::Regex::new(r"(?m)(//|#).*\n")
302 .expect("valid static regex: line comment")
303 })
304 .replace_all(&no_blocks, "\n")
305 .to_string()
306 } else {
307 raw_content.clone() }
309 }
310 };
311
312 let mut bones = vec![Bone::default()];
313
314 for plugin in &active_plugins {
315 plugin.enrich(path, &mut bones)?;
316 }
317
318 if !degrade_to_bones {
319 if let Some(max) = self.max_tokens {
320 let current_tokens = bpe.encode_with_special_tokens(&output).len();
321 let content_tokens = bpe.encode_with_special_tokens(&content).len();
322 if current_tokens + content_tokens > max {
323 degrade_to_bones = true;
324 }
325 }
326 }
327
328 match self.format {
329 OutputFormat::Xml => {
330 output.push_str(&format!(
331 " <file path=\"{}\">\n",
332 Self::xml_escape(&path.display().to_string())
333 ));
334 if !degrade_to_bones {
335 let safe_content = Self::xml_escape_cdata(&content);
336 if safe_content == content {
337 output.push_str(&format!(
338 " <content><![CDATA[\n{}\n]]></content>\n",
339 safe_content
340 ));
341 } else {
342 output.push_str(&format!(
345 " <content>{}</content>\n",
346 Self::xml_escape(&content)
347 ));
348 }
349 }
350 let has_metadata = bones.iter().any(|b| !b.metadata.is_empty());
352 if has_metadata {
353 output.push_str(" <bones>\n");
354 for bone in &bones {
355 for (k, v) in &bone.metadata {
356 output.push_str(&format!(
357 " <metadata key=\"{}\">{}</metadata>\n",
358 Self::xml_escape(k),
359 Self::xml_escape(v)
360 ));
361 }
362 }
363 output.push_str(" </bones>\n");
364 }
365 output.push_str(" </file>\n");
366 }
367 OutputFormat::Markdown => {
368 output.push_str(&format!("## {}\n\n", path.display()));
369 if !degrade_to_bones {
370 let max_backticks = {
373 let mut max = 0usize;
374 let mut cur = 0usize;
375 for c in content.chars() {
376 if c == '`' {
377 cur += 1;
378 max = max.max(cur);
379 } else {
380 cur = 0;
381 }
382 }
383 max
384 };
385 let fence_len = max_backticks.max(2) + 1;
386 let fence = "`".repeat(fence_len);
387 let safe_content = if max_backticks >= fence_len - 1 {
393 let threshold = fence_len - 1;
394 let mut result = String::with_capacity(content.len());
395 let mut run = 0usize;
396 for c in content.chars() {
397 result.push(c);
398 if c == '`' {
399 run += 1;
400 if run == threshold {
401 result.push('\u{200B}'); run = 0;
403 }
404 } else {
405 run = 0;
406 }
407 }
408 result
409 } else {
410 content.clone()
411 };
412 output.push_str(&format!("{}\n{}\n{}\n\n", fence, safe_content, fence));
413 }
414 let has_metadata = bones.iter().any(|b| !b.metadata.is_empty());
416 if has_metadata {
417 output.push_str("Bones:\n");
418 for bone in &bones {
419 for (k, v) in &bone.metadata {
420 output.push_str(&format!("- {}: {}\n", k, v));
421 }
422 }
423 output.push('\n');
424 }
425 }
426 }
427 }
428
429 if let OutputFormat::Xml = self.format {
430 output.push_str("</repository>\n");
431 }
432
433 Ok(output)
434 }
435}
436
437#[cfg(test)]
438mod tests {
439 use super::*;
440 use std::io::Write;
441
442 struct MockPlugin;
443
444 impl ContextPlugin for MockPlugin {
445 fn name(&self) -> &str {
446 "mock"
447 }
448
449 fn detect(&self, _directory: &Path) -> bool {
450 true
451 }
452
453 fn enrich(&self, _file_path: &Path, base_bones: &mut Vec<Bone>) -> Result<()> {
454 for bone in base_bones.iter_mut() {
455 bone.metadata
456 .insert("injected".to_string(), "true".to_string());
457 }
458 Ok(())
459 }
460 }
461
462 fn make_temp_rs_file(content: &str) -> (tempfile::TempDir, PathBuf) {
463 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
464 let file_path = dir.path().join("sample.rs");
465 let mut f = std::fs::File::create(&file_path).expect("failed to create temp file");
466 f.write_all(content.as_bytes())
467 .expect("failed to write file content");
468 (dir, file_path)
469 }
470
471 #[test]
472 fn test_plugin_detect_and_enrich() {
473 let plugin = MockPlugin;
474 assert!(plugin.detect(Path::new(".")));
475 let mut bones = vec![Bone::default()];
476 plugin
477 .enrich(Path::new("any_file.rs"), &mut bones)
478 .expect("enrich should succeed");
479 assert_eq!(
480 bones[0]
481 .metadata
482 .get("injected")
483 .expect("injected key must be present"),
484 "true"
485 );
486 }
487
488 #[test]
489 fn test_packer_xml_format() {
490 let (_dir, file_path) = make_temp_rs_file("fn main() {}\n");
491 let packer = Packer::new(
492 SqliteCache::new_in_memory().expect("failed to create test cache"),
493 Parser {},
494 OutputFormat::Xml,
495 None,
496 false,
497 false,
498 false,
499 false,
500 false,
501 );
502 let result = packer.pack(&[file_path]);
503 assert!(result.is_ok());
504 let output = result.expect("pack should succeed");
505 assert!(output.contains("<repository>"));
506 }
507
508 #[test]
509 fn test_packer_markdown_format() {
510 let (_dir, file_path) = make_temp_rs_file("fn main() {}\n");
511 let packer = Packer::new(
512 SqliteCache::new_in_memory().expect("failed to create test cache"),
513 Parser {},
514 OutputFormat::Markdown,
515 None,
516 false,
517 false,
518 false,
519 false,
520 false,
521 );
522 let result = packer.pack(std::slice::from_ref(&file_path));
523 assert!(result.is_ok());
524 let output = result.expect("pack should succeed");
525 assert!(output.contains(&format!("## {}", file_path.display())));
526 }
527
528 #[test]
529 fn test_packer_with_plugins() {
530 let (_dir, file_path) = make_temp_rs_file("fn main() {}\n");
531 let mut packer = Packer::new(
532 SqliteCache::new_in_memory().expect("failed to create test cache"),
533 Parser {},
534 OutputFormat::Xml,
535 None,
536 false,
537 false,
538 false,
539 false,
540 false,
541 );
542 packer.register_plugin(Box::new(MockPlugin));
543 let result = packer.pack(&[file_path]);
544 assert!(result.is_ok());
545 let output = result.expect("pack should succeed");
546 assert!(output.contains("injected"));
547 }
548
549 #[test]
550 fn test_packer_empty_file_list() {
551 let packer = Packer::new(
552 SqliteCache::new_in_memory().expect("failed to create test cache"),
553 Parser {},
554 OutputFormat::Xml,
555 None,
556 false,
557 false,
558 false,
559 false,
560 false,
561 );
562 let result = packer.pack(&[]);
563 assert!(result.is_ok());
564 }
565
566 #[test]
567 fn test_packer_missing_file() {
568 let packer = Packer::new(
569 SqliteCache::new_in_memory().expect("failed to create test cache"),
570 Parser {},
571 OutputFormat::Xml,
572 None,
573 false,
574 false,
575 false,
576 false,
577 false,
578 );
579 let result = packer.pack(&[PathBuf::from("this_file_does_not_exist_xyz.rs")]);
580 assert!(result.is_ok());
582 }
583
584 #[test]
585 fn test_packer_generates_skeleton_map_at_top() {
586 let (_dir, file_path) = make_temp_rs_file("fn main() {}\n");
587 let packer = Packer::new(
588 SqliteCache::new_in_memory().expect("failed to create test cache"),
589 Parser {},
590 OutputFormat::Xml,
591 None,
592 false,
593 false,
594 false,
595 false,
596 false,
597 );
598 let result = packer.pack(&[file_path]);
599 assert!(result.is_ok());
600 let output = result.expect("pack should succeed");
601 assert!(output.starts_with("<repository>\n <skeleton_map>"));
603 }
604
605 #[test]
606 fn test_packer_token_governor_degrades_to_bones() {
607 let (_dir, file_path) = make_temp_rs_file("fn main() { let x = 1; }\n");
609 let packer = Packer::new(
610 SqliteCache::new_in_memory().expect("failed to create test cache"),
611 Parser {},
612 OutputFormat::Xml,
613 Some(10),
614 false,
615 false,
616 false,
617 false,
618 false,
619 );
620 let result = packer.pack(&[file_path]);
621 assert!(result.is_ok());
622 let output = result.expect("pack should succeed");
623 assert!(!output.contains("<content>"));
625 }
626
627 fn make_temp_file(dir: &tempfile::TempDir, filename: &str, content: &str) -> PathBuf {
631 let file_path = dir.path().join(filename);
632 if let Some(parent) = file_path.parent() {
633 std::fs::create_dir_all(parent).expect("failed to create parent directories");
634 }
635 let mut f = std::fs::File::create(&file_path).expect("failed to create temp file");
636 f.write_all(content.as_bytes())
637 .expect("failed to write file content");
638 file_path
639 }
640
641 #[test]
650 fn test_xml_signature_special_chars_are_escaped() {
651 use crate::cache::CacheStore;
652
653 let cache = SqliteCache::new_in_memory().expect("failed to create test cache");
654 cache.init().expect("failed to init cache schema");
655
656 let file_id = cache
658 .upsert_file("bad.rs", "h1", b"fn bad() {}")
659 .expect("upsert_file should succeed");
660 cache
661 .insert_symbol(&crate::cache::Symbol {
662 id: "s1".to_string(),
663 file_id,
664 name: "<script>&\"test\"</script>".to_string(),
665 kind: "function".to_string(),
666 byte_offset: 0,
667 byte_length: 11,
668 })
669 .expect("symbol insert should succeed");
670
671 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
672 let file_path = make_temp_file(&dir, "bad.rs", "fn bad() {}\n");
673
674 let packer = Packer::with_workspace_root(
675 cache,
676 Parser {},
677 dir.path().to_path_buf(),
678 OutputFormat::Xml,
679 None,
680 false, false, false,
683 false,
684 false,
685 );
686 let output = packer.pack(&[file_path]).expect("pack should succeed");
687
688 assert!(
691 !output.contains("<script>"),
692 "Bare <script> tag should not appear in XML output; expected escaped form"
693 );
694 assert!(
695 output.contains("<script>") || output.contains("&"),
696 "XML special characters in symbol names must be escaped"
697 );
698 }
699
700 #[test]
703 fn test_xml_path_attribute_special_chars_are_escaped() {
704 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
705 let file_path = make_temp_file(&dir, "a&b.txt", "hello world\n");
707
708 let packer = Packer::new(
709 SqliteCache::new_in_memory().expect("failed to create test cache"),
710 Parser {},
711 OutputFormat::Xml,
712 None,
713 false,
714 false,
715 false,
716 false,
717 false,
718 );
719 let output = packer.pack(&[file_path]).expect("pack should succeed");
720
721 assert!(
723 !output.contains("path=\"") || !output.contains("a&b.txt\""),
724 "Bare & in path attribute must be escaped as &"
725 );
726 }
727
728 #[test]
731 fn test_xml_cdata_cdata_end_sequence_is_escaped() {
732 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
733 let tricky = "let s = \"]]>\";\n";
735 let file_path = make_temp_file(&dir, "tricky.txt", tricky);
736
737 let packer = Packer::new(
738 SqliteCache::new_in_memory().expect("failed to create test cache"),
739 Parser {},
740 OutputFormat::Xml,
741 None,
742 false,
743 false,
744 false,
745 false,
746 false,
747 );
748 let output = packer.pack(&[file_path]).expect("pack should succeed");
749
750 let positions: Vec<_> = output.match_indices("]]>").collect();
756 for (idx, _) in &positions {
757 let after = &output[idx + 3..];
758 assert!(
759 after.starts_with("</content>"),
760 "Found ]]> at position {} that is not the CDATA closing sequence; \
761 raw content may break XML well-formedness",
762 idx
763 );
764 }
765 }
766
767 #[test]
770 fn test_xml_output_basic_well_formedness() {
771 let (_dir, file_path) = make_temp_rs_file("fn main() {}\n");
772
773 let packer = Packer::new(
774 SqliteCache::new_in_memory().expect("failed to create test cache"),
775 Parser {},
776 OutputFormat::Xml,
777 None,
778 false,
779 false,
780 false,
781 false,
782 false,
783 );
784 let output = packer.pack(&[file_path]).expect("pack should succeed");
785
786 assert!(
787 output.starts_with("<repository>"),
788 "XML output must start with <repository>"
789 );
790 assert!(
791 output.trim_end().ends_with("</repository>"),
792 "XML output must end with </repository>"
793 );
794
795 let cdata_re =
797 regex::Regex::new(r"(?s)<!\[CDATA\[.*?]]>").expect("failed to compile cdata regex");
798 let stripped = cdata_re.replace_all(&output, "");
799
800 for (i, ch) in stripped.char_indices() {
802 if ch == '<' {
803 let next = stripped[i + 1..].chars().next();
804 assert!(
805 matches!(next, Some('/' | '!' | '?' | 'a'..='z' | 'A'..='Z')),
806 "Bare < found at position {} outside of CDATA: ...{}...",
807 i,
808 &stripped[i.saturating_sub(10)..std::cmp::min(i + 20, stripped.len())]
809 );
810 }
811 }
812 }
813
814 #[test]
821 fn test_markdown_skeleton_map_indentation() {
822 use crate::cache::CacheStore;
823
824 let cache = SqliteCache::new_in_memory().expect("failed to create test cache");
825 cache.init().expect("failed to init cache schema");
826
827 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
828 let file_path = make_temp_file(&dir, "lib.rs", "fn alpha() {}\n");
829
830 let file_id = cache
831 .upsert_file("lib.rs", "h2", b"fn alpha() {}")
832 .expect("upsert_file should succeed");
833 cache
834 .insert_symbol(&crate::cache::Symbol {
835 id: "s_alpha".to_string(),
836 file_id,
837 name: "alpha".to_string(),
838 kind: "function".to_string(),
839 byte_offset: 0,
840 byte_length: 13,
841 })
842 .expect("symbol insert should succeed");
843
844 let packer = Packer::with_workspace_root(
845 cache,
846 Parser {},
847 dir.path().to_path_buf(),
848 OutputFormat::Markdown,
849 None,
850 false,
851 true, false,
853 false,
854 false,
855 );
856 let output = packer.pack(&[file_path]).expect("pack should succeed");
857
858 assert!(
860 output.contains("- "),
861 "File bullet not found in Markdown output"
862 );
863
864 assert!(
866 output.contains(" - function alpha"),
867 "Symbol entries in skeleton map must be indented with two spaces; got:\n{}",
868 output
869 );
870 }
871
872 #[test]
875 fn test_markdown_symbol_names_with_special_chars() {
876 use crate::cache::CacheStore;
877
878 let cache = SqliteCache::new_in_memory().expect("failed to create test cache");
879 cache.init().expect("failed to init cache schema");
880
881 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
882 let file_path = make_temp_file(&dir, "weird.rs", "fn weird() {}\n");
883
884 let file_id = cache
885 .upsert_file("weird.rs", "h3", b"fn weird() {}")
886 .expect("upsert_file should succeed");
887 cache
889 .insert_symbol(&crate::cache::Symbol {
890 id: "s_weird".to_string(),
891 file_id,
892 name: "*_[weird`_]*".to_string(),
893 kind: "function".to_string(),
894 byte_offset: 0,
895 byte_length: 13,
896 })
897 .expect("symbol insert should succeed");
898
899 let packer = Packer::with_workspace_root(
900 cache,
901 Parser {},
902 dir.path().to_path_buf(),
903 OutputFormat::Markdown,
904 None,
905 false,
906 true, false,
908 false,
909 false,
910 );
911 let output = packer.pack(&[file_path]).expect("pack should succeed");
912
913 assert!(output.contains("- "), "File bullet disappeared");
915
916 assert!(
918 output.contains("*_[weird`_]*"),
919 "Symbol name with Markdown special chars should appear verbatim"
920 );
921 }
922
923 #[test]
924 fn test_markdown_skeleton_map_uses_exact_relative_path_for_duplicate_basenames() {
925 use crate::cache::CacheStore;
926
927 let cache = SqliteCache::new_in_memory().expect("failed to create test cache");
928 cache.init().expect("failed to init cache schema");
929
930 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
931 std::fs::create_dir_all(dir.path().join("src")).expect("create src dir");
932 std::fs::create_dir_all(dir.path().join("tests")).expect("create tests dir");
933
934 let src_path = make_temp_file(&dir, "src/lib.rs", "fn alpha() {}\n");
935 let tests_path = make_temp_file(&dir, "tests/lib.rs", "fn beta() {}\n");
936
937 let src_file_id = cache
938 .upsert_file("src/lib.rs", "h-src", b"fn alpha() {}")
939 .expect("upsert_file should succeed");
940 cache
941 .insert_symbol(&crate::cache::Symbol {
942 id: "src_alpha".to_string(),
943 file_id: src_file_id,
944 name: "alpha".to_string(),
945 kind: "function".to_string(),
946 byte_offset: 0,
947 byte_length: 13,
948 })
949 .expect("insert alpha symbol");
950
951 let tests_file_id = cache
952 .upsert_file("tests/lib.rs", "h-tests", b"fn beta() {}")
953 .expect("upsert_file should succeed");
954 cache
955 .insert_symbol(&crate::cache::Symbol {
956 id: "tests_beta".to_string(),
957 file_id: tests_file_id,
958 name: "beta".to_string(),
959 kind: "function".to_string(),
960 byte_offset: 0,
961 byte_length: 12,
962 })
963 .expect("insert beta symbol");
964
965 let packer = Packer::with_workspace_root(
966 cache,
967 Parser {},
968 dir.path().to_path_buf(),
969 OutputFormat::Markdown,
970 None,
971 false,
972 true,
973 false,
974 false,
975 false,
976 );
977 let output = packer
978 .pack(&[src_path.clone(), tests_path.clone()])
979 .expect("pack should succeed");
980
981 let expected_src = format!("- {}\n - function alpha", src_path.display());
982 let expected_tests = format!("- {}\n - function beta", tests_path.display());
983 assert!(
984 output.contains(&expected_src),
985 "src/lib.rs should retain its own symbols; got:\n{output}"
986 );
987 assert!(
988 output.contains(&expected_tests),
989 "tests/lib.rs should retain its own symbols; got:\n{output}"
990 );
991 }
992
993 #[test]
994 fn test_plugin_detection_uses_workspace_root_for_nested_files() {
995 struct RootMarkerPlugin;
996
997 impl ContextPlugin for RootMarkerPlugin {
998 fn name(&self) -> &str {
999 "root-marker"
1000 }
1001
1002 fn detect(&self, workspace_root: &Path) -> bool {
1003 workspace_root.join("manifest.json").exists()
1004 }
1005
1006 fn enrich(&self, _file_path: &Path, base_bones: &mut Vec<Bone>) -> Result<()> {
1007 for bone in base_bones.iter_mut() {
1008 bone.metadata
1009 .insert("root_detected".to_string(), "true".to_string());
1010 }
1011 Ok(())
1012 }
1013 }
1014
1015 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1016 std::fs::write(dir.path().join("manifest.json"), "{}").expect("write root marker");
1017 let nested = make_temp_file(&dir, "src/lib.rs", "fn nested() {}\n");
1018
1019 let mut packer = Packer::with_workspace_root(
1020 SqliteCache::new_in_memory().expect("failed to create test cache"),
1021 Parser {},
1022 dir.path().to_path_buf(),
1023 OutputFormat::Xml,
1024 None,
1025 false,
1026 false,
1027 false,
1028 false,
1029 false,
1030 );
1031 packer.register_plugin(Box::new(RootMarkerPlugin));
1032
1033 let output = packer.pack(&[nested]).expect("pack should succeed");
1034 assert!(
1035 output.contains("root_detected"),
1036 "plugin detect() should run against workspace root and enrich nested files"
1037 );
1038 }
1039
1040 #[test]
1046 fn test_token_governor_generous_budget_includes_content() {
1047 let (_dir, file_path) = make_temp_rs_file("fn main() { let x = 42; }\n");
1048
1049 let packer = Packer::new(
1050 SqliteCache::new_in_memory().expect("failed to create test cache"),
1051 Parser {},
1052 OutputFormat::Xml,
1053 Some(100_000), false,
1055 false,
1056 false,
1057 false,
1058 false,
1059 );
1060 let output = packer.pack(&[file_path]).expect("pack should succeed");
1061
1062 assert!(
1064 output.contains("<content><![CDATA["),
1065 "Expected <content> block when budget is generous; got:\n{}",
1066 output
1067 );
1068 }
1069
1070 #[test]
1072 fn test_token_governor_one_token_budget_omits_content() {
1073 let (_dir, file_path) = make_temp_rs_file("fn main() { let x = 42; }\n");
1074
1075 let packer = Packer::new(
1076 SqliteCache::new_in_memory().expect("failed to create test cache"),
1077 Parser {},
1078 OutputFormat::Xml,
1079 Some(1), false,
1081 false,
1082 false,
1083 false,
1084 false,
1085 );
1086 let result = packer.pack(&[file_path]);
1087
1088 assert!(result.is_ok(), "pack() must not error under tight budget");
1090 let output = result.expect("pack should succeed");
1091
1092 assert!(
1094 !output.contains("<content>"),
1095 "No <content> block expected when budget is 1 token"
1096 );
1097 }
1098
1099 #[test]
1101 fn test_token_governor_graceful_degradation_no_panic() {
1102 let (_dir, file_path) =
1103 make_temp_rs_file("fn a() { 1 }\nfn b() { 2 }\nfn c() { 3 }\nfn d() { 4 }\n");
1104
1105 for budget in [0usize, 1, 5, 50] {
1106 let packer = Packer::new(
1107 SqliteCache::new_in_memory().expect("failed to create test cache"),
1108 Parser {},
1109 OutputFormat::Xml,
1110 Some(budget),
1111 false,
1112 false,
1113 false,
1114 false,
1115 false,
1116 );
1117 let result = packer.pack(std::slice::from_ref(&file_path));
1118 assert!(
1119 result.is_ok(),
1120 "pack() panicked or errored at max_tokens={}",
1121 budget
1122 );
1123 }
1124 }
1125
1126 #[test]
1133 fn test_no_files_and_no_file_summary_together() {
1134 let (_dir, file_path) = make_temp_rs_file("fn main() {}\n");
1135
1136 let packer = Packer::new(
1137 SqliteCache::new_in_memory().expect("failed to create test cache"),
1138 Parser {},
1139 OutputFormat::Xml,
1140 None,
1141 true, true, false,
1144 false,
1145 false,
1146 );
1147 let output = packer.pack(&[file_path]).expect("pack should succeed");
1148
1149 let trimmed = output.trim();
1151 assert_eq!(
1152 trimmed, "<repository>\n</repository>",
1153 "With both no_files and no_file_summary, output should be just the repository tags; got:\n{}",
1154 trimmed
1155 );
1156 }
1157
1158 #[test]
1160 fn test_remove_line_comments_from_rust() {
1161 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1162 let file_path = make_temp_file(
1164 &dir,
1165 "comments.txt",
1166 "let x = 1; // this is a comment\nlet y = 2;\n",
1167 );
1168
1169 let packer = Packer::new(
1170 SqliteCache::new_in_memory().expect("failed to create test cache"),
1171 Parser {},
1172 OutputFormat::Xml,
1173 None,
1174 false,
1175 false,
1176 true, false,
1178 false,
1179 );
1180 let output = packer.pack(&[file_path]).expect("pack should succeed");
1181
1182 assert!(
1183 !output.contains("// this is a comment"),
1184 "Line comment should be stripped; got:\n{}",
1185 output
1186 );
1187 assert!(
1188 output.contains("let x = 1;"),
1189 "Non-comment code should remain after stripping line comments"
1190 );
1191 }
1192
1193 #[test]
1195 fn test_remove_block_comments() {
1196 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1197 let file_path = make_temp_file(
1198 &dir,
1199 "block_comments.txt",
1200 "int x = /* inline block */ 42;\n/* multi\nline\ncomment */\nint y = 1;\n",
1201 );
1202
1203 let packer = Packer::new(
1204 SqliteCache::new_in_memory().expect("failed to create test cache"),
1205 Parser {},
1206 OutputFormat::Xml,
1207 None,
1208 false,
1209 false,
1210 true, false,
1212 false,
1213 );
1214 let output = packer.pack(&[file_path]).expect("pack should succeed");
1215
1216 assert!(
1217 !output.contains("inline block"),
1218 "Inline block comment should be stripped"
1219 );
1220 assert!(
1221 !output.contains("multi\nline\ncomment"),
1222 "Multi-line block comment should be stripped"
1223 );
1224 assert!(
1225 output.contains("int x ="),
1226 "Code outside block comment should be preserved"
1227 );
1228 }
1229
1230 #[test]
1233 fn test_remove_empty_lines_collapses_blanks() {
1234 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1235 let file_path = make_temp_file(
1236 &dir,
1237 "blanks.txt",
1238 "line one\n\n\n\nline two\n\n\nline three\n",
1239 );
1240
1241 let packer = Packer::new(
1242 SqliteCache::new_in_memory().expect("failed to create test cache"),
1243 Parser {},
1244 OutputFormat::Xml,
1245 None,
1246 false,
1247 false,
1248 false,
1249 true, false,
1251 );
1252 let output = packer.pack(&[file_path]).expect("pack should succeed");
1253
1254 assert!(
1256 !output.contains("\n\n\n"),
1257 "Multiple consecutive blank lines should be collapsed to a single newline; got:\n{}",
1258 output
1259 );
1260 assert!(
1261 output.contains("line one"),
1262 "Non-blank lines must be preserved"
1263 );
1264 assert!(
1265 output.contains("line two"),
1266 "Non-blank lines must be preserved"
1267 );
1268 }
1269
1270 #[test]
1273 fn test_truncate_base64_replaces_long_strings() {
1274 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1275 let long_token = "A".repeat(100);
1277 let content = format!("key = {}\n", long_token);
1278 let file_path = make_temp_file(&dir, "tokens.txt", &content);
1279
1280 let packer = Packer::new(
1281 SqliteCache::new_in_memory().expect("failed to create test cache"),
1282 Parser {},
1283 OutputFormat::Xml,
1284 None,
1285 false,
1286 false,
1287 false,
1288 false,
1289 true, );
1291 let output = packer.pack(&[file_path]).expect("pack should succeed");
1292
1293 assert!(
1294 output.contains("[TRUNCATED_BASE64]"),
1295 "A 100-char alphanumeric string should be replaced with [TRUNCATED_BASE64]"
1296 );
1297 assert!(
1298 !output.contains(&long_token),
1299 "The original long token must not appear in output after truncation"
1300 );
1301 }
1302
1303 #[test]
1305 fn test_truncate_base64_preserves_short_strings() {
1306 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1307 let short_token = "B".repeat(99);
1309 let content = format!("key = {}\n", short_token);
1310 let file_path = make_temp_file(&dir, "short_tokens.txt", &content);
1311
1312 let packer = Packer::new(
1313 SqliteCache::new_in_memory().expect("failed to create test cache"),
1314 Parser {},
1315 OutputFormat::Xml,
1316 None,
1317 false,
1318 false,
1319 false,
1320 false,
1321 true, );
1323 let output = packer.pack(&[file_path]).expect("pack should succeed");
1324
1325 assert!(
1326 output.contains(&short_token),
1327 "A 99-char string must NOT be truncated"
1328 );
1329 assert!(
1330 !output.contains("[TRUNCATED_BASE64]"),
1331 "No truncation should occur for strings under 100 chars"
1332 );
1333 }
1334
1335 #[test]
1341 fn test_three_files_all_appear_in_skeleton_map() {
1342 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1343 let f1 = make_temp_file(&dir, "one.txt", "content one\n");
1344 let f2 = make_temp_file(&dir, "two.txt", "content two\n");
1345 let f3 = make_temp_file(&dir, "three.txt", "content three\n");
1346
1347 let packer = Packer::new(
1348 SqliteCache::new_in_memory().expect("failed to create test cache"),
1349 Parser {},
1350 OutputFormat::Xml,
1351 None,
1352 false,
1353 false,
1354 false,
1355 false,
1356 false,
1357 );
1358 let output = packer.pack(&[f1, f2, f3]).expect("pack should succeed");
1359
1360 assert!(output.contains("one.txt"), "one.txt missing from output");
1361 assert!(output.contains("two.txt"), "two.txt missing from output");
1362 assert!(
1363 output.contains("three.txt"),
1364 "three.txt missing from output"
1365 );
1366 }
1367
1368 #[test]
1371 fn test_skeleton_map_preserves_input_order() {
1372 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1373 let f1 = make_temp_file(&dir, "alpha.txt", "alpha\n");
1374 let f2 = make_temp_file(&dir, "beta.txt", "beta\n");
1375 let f3 = make_temp_file(&dir, "gamma.txt", "gamma\n");
1376
1377 let packer = Packer::new(
1378 SqliteCache::new_in_memory().expect("failed to create test cache"),
1379 Parser {},
1380 OutputFormat::Xml,
1381 None,
1382 false,
1383 false,
1384 false,
1385 false,
1386 false,
1387 );
1388 let output = packer.pack(&[f1, f2, f3]).expect("pack should succeed");
1389
1390 let pos_alpha = output.find("alpha.txt").expect("alpha.txt not found");
1391 let pos_beta = output.find("beta.txt").expect("beta.txt not found");
1392 let pos_gamma = output.find("gamma.txt").expect("gamma.txt not found");
1393
1394 assert!(
1395 pos_alpha < pos_beta && pos_beta < pos_gamma,
1396 "Files must appear in the skeleton map in the order they were supplied"
1397 );
1398 }
1399
1400 #[test]
1408 fn test_deleted_file_is_gracefully_skipped() {
1409 let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1410 let file_path = make_temp_file(&dir, "ephemeral.txt", "will be deleted\n");
1411
1412 std::fs::remove_file(&file_path).expect("failed to delete ephemeral file");
1414
1415 let packer = Packer::new(
1416 SqliteCache::new_in_memory().expect("failed to create test cache"),
1417 Parser {},
1418 OutputFormat::Xml,
1419 None,
1420 false,
1421 false,
1422 false,
1423 false,
1424 false,
1425 );
1426 let result = packer.pack(&[file_path]);
1427
1428 assert!(
1429 result.is_ok(),
1430 "pack() must not return Err when a file has been deleted; got: {:?}",
1431 result.err()
1432 );
1433
1434 let output = result.expect("pack should succeed even when file is deleted");
1435 assert!(
1437 output.contains("<repository>"),
1438 "Output must start with <repository>"
1439 );
1440 assert!(
1441 output.trim_end().ends_with("</repository>"),
1442 "Output must end with </repository>"
1443 );
1444 assert!(
1446 !output.contains("will be deleted"),
1447 "Content of deleted file must not appear in output"
1448 );
1449 }
1450
1451 #[test]
1462 fn test_plugin_metadata_xml_escaping() {
1463 struct XmlDangerousPlugin;
1464
1465 impl ContextPlugin for XmlDangerousPlugin {
1466 fn name(&self) -> &str {
1467 "xml_dangerous"
1468 }
1469
1470 fn detect(&self, _directory: &Path) -> bool {
1471 true
1472 }
1473
1474 fn enrich(&self, _file_path: &Path, base_bones: &mut Vec<Bone>) -> Result<()> {
1475 for bone in base_bones.iter_mut() {
1476 bone.metadata.insert(
1478 "key<with>&\"special".to_string(),
1479 "</metadata><malicious>payload</malicious><metadata key=\"x\">".to_string(),
1481 );
1482 }
1483 Ok(())
1484 }
1485 }
1486
1487 let (_dir, file_path) = make_temp_rs_file("fn main() {}\n");
1488 let mut packer = Packer::new(
1489 SqliteCache::new_in_memory().expect("failed to create test cache"),
1490 Parser {},
1491 OutputFormat::Xml,
1492 None,
1493 false,
1494 false,
1495 false,
1496 false,
1497 false,
1498 );
1499 packer.register_plugin(Box::new(XmlDangerousPlugin));
1500
1501 let output = packer.pack(&[file_path]).expect("pack should succeed");
1502
1503 assert!(
1505 !output.contains("<malicious>"),
1506 "Bare <malicious> tag found in output — metadata value was not XML-escaped; got:\n{}",
1507 output
1508 );
1509 assert!(
1510 !output.contains("</malicious>"),
1511 "Bare </malicious> tag found in output — metadata value was not XML-escaped; got:\n{}",
1512 output
1513 );
1514
1515 assert!(
1518 output.contains("<") || output.contains(">") || output.contains("&"),
1519 "Expected XML-escaped entities (<, >, or &) in metadata output; got:\n{}",
1520 output
1521 );
1522
1523 assert!(
1525 output.contains("</repository>"),
1526 "Output must still contain </repository> after metadata injection; got:\n{}",
1527 output
1528 );
1529 }
1530}