Skip to main content

codebones_core/
plugin.rs

1use crate::cache::{CacheStore, SqliteCache};
2use crate::parser::Bone;
3use crate::parser::Parser;
4use anyhow::Result;
5use std::path::{Path, PathBuf};
6use std::sync::OnceLock;
7
8// Regex::new is called inside OnceLock::get_or_init, which guarantees compilation at most once.
9// Clippy cannot see through the OnceLock abstraction and fires regex_creation_in_loops.
10#[allow(clippy::regex_creation_in_loops)]
11static RE_EMPTY_LINES: OnceLock<regex::Regex> = OnceLock::new();
12#[allow(clippy::regex_creation_in_loops)]
13static RE_BASE64: OnceLock<regex::Regex> = OnceLock::new();
14#[allow(clippy::regex_creation_in_loops)]
15static RE_LINE_COMMENT: OnceLock<regex::Regex> = OnceLock::new();
16#[allow(clippy::regex_creation_in_loops)]
17static RE_BLOCK_COMMENT: OnceLock<regex::Regex> = OnceLock::new();
18
19/// A plugin that can enrich extracted code bones with domain-specific metadata.
20pub trait ContextPlugin: Send + Sync {
21    /// The unique name of the plugin (e.g., "dbt", "openapi").
22    fn name(&self) -> &str;
23
24    /// Returns true if this plugin should be active for the given directory/workspace.
25    fn detect(&self, directory: &Path) -> bool;
26
27    /// Enriches the extracted bones for a specific file with additional metadata.
28    /// The plugin can modify the `base_bones` in place (e.g., adding JSON metadata).
29    fn enrich(&self, file_path: &Path, base_bones: &mut Vec<Bone>) -> Result<()>;
30}
31
32/// Supported output formats for the packed context.
33pub enum OutputFormat {
34    Xml,
35    Markdown,
36}
37
38impl OutputFormat {
39    pub fn parse(format: &str) -> Result<Self> {
40        match format.to_lowercase().as_str() {
41            "xml" => Ok(Self::Xml),
42            "markdown" => Ok(Self::Markdown),
43            other => anyhow::bail!("Invalid output format: {other}. Expected 'xml' or 'markdown'"),
44        }
45    }
46}
47
48/// Bundles files and their enriched bones into an AI-friendly output format.
49pub struct Packer {
50    cache: SqliteCache,
51    parser: Parser,
52    workspace_root: PathBuf,
53    plugins: Vec<Box<dyn ContextPlugin>>,
54    format: OutputFormat,
55    max_tokens: Option<usize>,
56    no_file_summary: bool,
57    no_files: bool,
58    remove_comments: bool,
59    remove_empty_lines: bool,
60    truncate_base64: bool,
61}
62
63impl Packer {
64    fn xml_escape(s: &str) -> String {
65        s.replace('&', "&amp;")
66            .replace('<', "&lt;")
67            .replace('>', "&gt;")
68            .replace('"', "&quot;")
69            .replace('\'', "&apos;")
70    }
71
72    fn xml_escape_cdata(s: &str) -> String {
73        // Split ]]> into ]]]]><![CDATA[> to keep it inside CDATA
74        s.replace("]]>", "]]]]><![CDATA[>")
75    }
76
77    /// Creates a new Packer instance.
78    #[allow(clippy::too_many_arguments)]
79    pub fn new(
80        cache: SqliteCache,
81        parser: Parser,
82        format: OutputFormat,
83        max_tokens: Option<usize>,
84        no_file_summary: bool,
85        no_files: bool,
86        remove_comments: bool,
87        remove_empty_lines: bool,
88        truncate_base64: bool,
89    ) -> Self {
90        Self::with_workspace_root(
91            cache,
92            parser,
93            PathBuf::from("."),
94            format,
95            max_tokens,
96            no_file_summary,
97            no_files,
98            remove_comments,
99            remove_empty_lines,
100            truncate_base64,
101        )
102    }
103
104    #[allow(clippy::too_many_arguments)]
105    pub fn with_workspace_root(
106        cache: SqliteCache,
107        parser: Parser,
108        workspace_root: PathBuf,
109        format: OutputFormat,
110        max_tokens: Option<usize>,
111        no_file_summary: bool,
112        no_files: bool,
113        remove_comments: bool,
114        remove_empty_lines: bool,
115        truncate_base64: bool,
116    ) -> Self {
117        let _ = cache.init();
118        Self {
119            cache,
120            parser,
121            workspace_root,
122            plugins: Vec::new(),
123            format,
124            max_tokens,
125            no_file_summary,
126            no_files,
127            remove_comments,
128            remove_empty_lines,
129            truncate_base64,
130        }
131    }
132
133    /// Registers a context plugin.
134    pub fn register_plugin(&mut self, plugin: Box<dyn ContextPlugin>) {
135        self.plugins.push(plugin);
136    }
137
138    /// Packs the specified files into a single formatted string.
139    // OnceLock::get_or_init guarantees each regex is compiled at most once.
140    // Clippy fires regex_creation_in_loops because it cannot see through the OnceLock
141    // abstraction — the allow is intentional and correct.
142    #[allow(clippy::regex_creation_in_loops)]
143    pub fn pack(&self, file_paths: &[PathBuf]) -> Result<String> {
144        let _ = &self.parser;
145
146        let mut output = String::new();
147        let active_plugins: Vec<&dyn ContextPlugin> = self
148            .plugins
149            .iter()
150            .filter(|plugin| plugin.detect(&self.workspace_root))
151            .map(|plugin| plugin.as_ref())
152            .collect();
153
154        match self.format {
155            OutputFormat::Xml => output.push_str("<repository>\n"),
156            OutputFormat::Markdown => {}
157        }
158
159        let lookup_symbols = |path: &PathBuf| -> Result<Vec<(String, String)>> {
160            let relative_path = path
161                .strip_prefix(&self.workspace_root)
162                .unwrap_or(path)
163                .to_string_lossy()
164                .to_string();
165            self.cache
166                .get_file_symbols(&relative_path)
167                .map_err(Into::into)
168        };
169
170        // Generate Skeleton Map
171        if !self.no_file_summary {
172            match self.format {
173                OutputFormat::Xml => {
174                    output.push_str("  <skeleton_map>\n");
175                    for path in file_paths {
176                        output.push_str(&format!(
177                            "    <file path=\"{}\">\n",
178                            Self::xml_escape(&path.display().to_string())
179                        ));
180                        for (kind, name) in lookup_symbols(path)? {
181                            output.push_str(&format!(
182                                "      <signature>{} {}</signature>\n",
183                                Self::xml_escape(&kind),
184                                Self::xml_escape(&name)
185                            ));
186                        }
187                        output.push_str("    </file>\n");
188                    }
189                    output.push_str("  </skeleton_map>\n");
190                }
191                OutputFormat::Markdown => {
192                    output.push_str("## Skeleton Map\n\n");
193                    for path in file_paths {
194                        output.push_str(&format!("- {}\n", path.display()));
195                        for (kind, name) in lookup_symbols(path)? {
196                            output.push_str(&format!("  - {} {}\n", kind, name));
197                        }
198                    }
199                    output.push('\n');
200                }
201            }
202        }
203
204        if self.no_files {
205            if let OutputFormat::Xml = self.format {
206                output.push_str("</repository>\n");
207            }
208            return Ok(output);
209        }
210
211        let bpe = tiktoken_rs::cl100k_base()
212            .map_err(|e| anyhow::anyhow!("Failed to initialize tokenizer: {}", e))?;
213        let mut degrade_to_bones = false;
214
215        for path in file_paths {
216            let mut raw_content = match std::fs::read_to_string(path) {
217                Ok(s) => s,
218                Err(e) => {
219                    eprintln!(
220                        "Warning: skipping unreadable file {}: {}",
221                        path.display(),
222                        e
223                    );
224                    continue;
225                }
226            };
227
228            if self.remove_empty_lines {
229                raw_content = RE_EMPTY_LINES
230                    .get_or_init(|| {
231                        regex::Regex::new(r"\n\s*\n").expect("valid static regex: empty lines")
232                    })
233                    .replace_all(&raw_content, "\n")
234                    .to_string();
235            }
236
237            if self.truncate_base64 {
238                // Truncate long hex or base64 looking strings (length > 100)
239                raw_content = RE_BASE64
240                    .get_or_init(|| {
241                        regex::Regex::new(r"[A-Za-z0-9+/=]{100,}")
242                            .expect("valid static regex: base64")
243                    })
244                    .replace_all(&raw_content, "[TRUNCATED_BASE64]")
245                    .to_string();
246            }
247
248            // Generate the skeleton by eliding function/class bodies
249            let content = {
250                let ext = path.extension().unwrap_or_default().to_string_lossy();
251                if let Some(spec) = crate::parser::get_spec_for_extension(&ext) {
252                    let doc = crate::parser::parse_file(&raw_content, &spec);
253                    let mut result = String::new();
254                    let mut last_end = 0;
255
256                    let mut indices: Vec<usize> = (0..doc.symbols.len()).collect();
257                    indices.sort_by_key(|&i| doc.symbols[i].full_range.start);
258
259                    for i in &indices {
260                        let sym = &doc.symbols[*i];
261                        if let Some(body_range) = &sym.body_range {
262                            if body_range.start >= last_end {
263                                result.push_str(&raw_content[last_end..body_range.start]);
264                                result.push_str("...");
265                                last_end = body_range.end;
266                            }
267                        }
268                    }
269                    result.push_str(&raw_content[last_end..]);
270
271                    if self.remove_comments {
272                        // Simple regex fallback for comments (C-style, Python, HTML)
273                        result = RE_BLOCK_COMMENT
274                            .get_or_init(|| {
275                                regex::Regex::new(r"(?s)/\*.*?\*/|<!--.*?-->")
276                                    .expect("valid static regex: block comment")
277                            })
278                            .replace_all(&result, "")
279                            .to_string();
280                        result = RE_LINE_COMMENT
281                            .get_or_init(|| {
282                                regex::Regex::new(r"(?m)(//|#).*\n")
283                                    .expect("valid static regex: line comment")
284                            })
285                            .replace_all(&result, "\n")
286                            .to_string();
287                    }
288
289                    result
290                } else {
291                    if self.remove_comments {
292                        let no_blocks = RE_BLOCK_COMMENT
293                            .get_or_init(|| {
294                                regex::Regex::new(r"(?s)/\*.*?\*/|<!--.*?-->")
295                                    .expect("valid static regex: block comment")
296                            })
297                            .replace_all(&raw_content, "")
298                            .to_string();
299                        RE_LINE_COMMENT
300                            .get_or_init(|| {
301                                regex::Regex::new(r"(?m)(//|#).*\n")
302                                    .expect("valid static regex: line comment")
303                            })
304                            .replace_all(&no_blocks, "\n")
305                            .to_string()
306                    } else {
307                        raw_content.clone() // Fallback to raw content if language isn't supported
308                    }
309                }
310            };
311
312            let mut bones = vec![Bone::default()];
313
314            for plugin in &active_plugins {
315                plugin.enrich(path, &mut bones)?;
316            }
317
318            if !degrade_to_bones {
319                if let Some(max) = self.max_tokens {
320                    let current_tokens = bpe.encode_with_special_tokens(&output).len();
321                    let content_tokens = bpe.encode_with_special_tokens(&content).len();
322                    if current_tokens + content_tokens > max {
323                        degrade_to_bones = true;
324                    }
325                }
326            }
327
328            match self.format {
329                OutputFormat::Xml => {
330                    output.push_str(&format!(
331                        "  <file path=\"{}\">\n",
332                        Self::xml_escape(&path.display().to_string())
333                    ));
334                    if !degrade_to_bones {
335                        let safe_content = Self::xml_escape_cdata(&content);
336                        if safe_content == content {
337                            output.push_str(&format!(
338                                "    <content><![CDATA[\n{}\n]]></content>\n",
339                                safe_content
340                            ));
341                        } else {
342                            // Content contains ]]> which cannot be safely embedded in CDATA;
343                            // fall back to XML entity escaping so the document stays well-formed.
344                            output.push_str(&format!(
345                                "    <content>{}</content>\n",
346                                Self::xml_escape(&content)
347                            ));
348                        }
349                    }
350                    // Only print bones block if plugins added metadata
351                    let has_metadata = bones.iter().any(|b| !b.metadata.is_empty());
352                    if has_metadata {
353                        output.push_str("    <bones>\n");
354                        for bone in &bones {
355                            for (k, v) in &bone.metadata {
356                                output.push_str(&format!(
357                                    "      <metadata key=\"{}\">{}</metadata>\n",
358                                    Self::xml_escape(k),
359                                    Self::xml_escape(v)
360                                ));
361                            }
362                        }
363                        output.push_str("    </bones>\n");
364                    }
365                    output.push_str("  </file>\n");
366                }
367                OutputFormat::Markdown => {
368                    output.push_str(&format!("## {}\n\n", path.display()));
369                    if !degrade_to_bones {
370                        // Find longest run of backticks in content and use one more as the fence
371                        // delimiter (CommonMark spec approach) to prevent fence injection.
372                        let max_backticks = {
373                            let mut max = 0usize;
374                            let mut cur = 0usize;
375                            for c in content.chars() {
376                                if c == '`' {
377                                    cur += 1;
378                                    max = max.max(cur);
379                                } else {
380                                    cur = 0;
381                                }
382                            }
383                            max
384                        };
385                        let fence_len = max_backticks.max(2) + 1;
386                        let fence = "`".repeat(fence_len);
387                        // Break up any backtick run of length >= (fence_len - 1) within the
388                        // content to prevent a closing-fence sequence from appearing verbatim.
389                        // A zero-width space (U+200B) is inserted after the (fence_len-1)-th
390                        // consecutive backtick so the run is interrupted while the characters
391                        // remain visible to the reader.
392                        let safe_content = if max_backticks >= fence_len - 1 {
393                            let threshold = fence_len - 1;
394                            let mut result = String::with_capacity(content.len());
395                            let mut run = 0usize;
396                            for c in content.chars() {
397                                result.push(c);
398                                if c == '`' {
399                                    run += 1;
400                                    if run == threshold {
401                                        result.push('\u{200B}'); // zero-width space
402                                        run = 0;
403                                    }
404                                } else {
405                                    run = 0;
406                                }
407                            }
408                            result
409                        } else {
410                            content.clone()
411                        };
412                        output.push_str(&format!("{}\n{}\n{}\n\n", fence, safe_content, fence));
413                    }
414                    // Only print Bones section if plugins added metadata
415                    let has_metadata = bones.iter().any(|b| !b.metadata.is_empty());
416                    if has_metadata {
417                        output.push_str("Bones:\n");
418                        for bone in &bones {
419                            for (k, v) in &bone.metadata {
420                                output.push_str(&format!("- {}: {}\n", k, v));
421                            }
422                        }
423                        output.push('\n');
424                    }
425                }
426            }
427        }
428
429        if let OutputFormat::Xml = self.format {
430            output.push_str("</repository>\n");
431        }
432
433        Ok(output)
434    }
435}
436
437#[cfg(test)]
438mod tests {
439    use super::*;
440    use std::io::Write;
441
442    struct MockPlugin;
443
444    impl ContextPlugin for MockPlugin {
445        fn name(&self) -> &str {
446            "mock"
447        }
448
449        fn detect(&self, _directory: &Path) -> bool {
450            true
451        }
452
453        fn enrich(&self, _file_path: &Path, base_bones: &mut Vec<Bone>) -> Result<()> {
454            for bone in base_bones.iter_mut() {
455                bone.metadata
456                    .insert("injected".to_string(), "true".to_string());
457            }
458            Ok(())
459        }
460    }
461
462    fn make_temp_rs_file(content: &str) -> (tempfile::TempDir, PathBuf) {
463        let dir = tempfile::TempDir::new().expect("failed to create temp dir");
464        let file_path = dir.path().join("sample.rs");
465        let mut f = std::fs::File::create(&file_path).expect("failed to create temp file");
466        f.write_all(content.as_bytes())
467            .expect("failed to write file content");
468        (dir, file_path)
469    }
470
471    #[test]
472    fn test_plugin_detect_and_enrich() {
473        let plugin = MockPlugin;
474        assert!(plugin.detect(Path::new(".")));
475        let mut bones = vec![Bone::default()];
476        plugin
477            .enrich(Path::new("any_file.rs"), &mut bones)
478            .expect("enrich should succeed");
479        assert_eq!(
480            bones[0]
481                .metadata
482                .get("injected")
483                .expect("injected key must be present"),
484            "true"
485        );
486    }
487
488    #[test]
489    fn test_packer_xml_format() {
490        let (_dir, file_path) = make_temp_rs_file("fn main() {}\n");
491        let packer = Packer::new(
492            SqliteCache::new_in_memory().expect("failed to create test cache"),
493            Parser {},
494            OutputFormat::Xml,
495            None,
496            false,
497            false,
498            false,
499            false,
500            false,
501        );
502        let result = packer.pack(&[file_path]);
503        assert!(result.is_ok());
504        let output = result.expect("pack should succeed");
505        assert!(output.contains("<repository>"));
506    }
507
508    #[test]
509    fn test_packer_markdown_format() {
510        let (_dir, file_path) = make_temp_rs_file("fn main() {}\n");
511        let packer = Packer::new(
512            SqliteCache::new_in_memory().expect("failed to create test cache"),
513            Parser {},
514            OutputFormat::Markdown,
515            None,
516            false,
517            false,
518            false,
519            false,
520            false,
521        );
522        let result = packer.pack(std::slice::from_ref(&file_path));
523        assert!(result.is_ok());
524        let output = result.expect("pack should succeed");
525        assert!(output.contains(&format!("## {}", file_path.display())));
526    }
527
528    #[test]
529    fn test_packer_with_plugins() {
530        let (_dir, file_path) = make_temp_rs_file("fn main() {}\n");
531        let mut packer = Packer::new(
532            SqliteCache::new_in_memory().expect("failed to create test cache"),
533            Parser {},
534            OutputFormat::Xml,
535            None,
536            false,
537            false,
538            false,
539            false,
540            false,
541        );
542        packer.register_plugin(Box::new(MockPlugin));
543        let result = packer.pack(&[file_path]);
544        assert!(result.is_ok());
545        let output = result.expect("pack should succeed");
546        assert!(output.contains("injected"));
547    }
548
549    #[test]
550    fn test_packer_empty_file_list() {
551        let packer = Packer::new(
552            SqliteCache::new_in_memory().expect("failed to create test cache"),
553            Parser {},
554            OutputFormat::Xml,
555            None,
556            false,
557            false,
558            false,
559            false,
560            false,
561        );
562        let result = packer.pack(&[]);
563        assert!(result.is_ok());
564    }
565
566    #[test]
567    fn test_packer_missing_file() {
568        let packer = Packer::new(
569            SqliteCache::new_in_memory().expect("failed to create test cache"),
570            Parser {},
571            OutputFormat::Xml,
572            None,
573            false,
574            false,
575            false,
576            false,
577            false,
578        );
579        let result = packer.pack(&[PathBuf::from("this_file_does_not_exist_xyz.rs")]);
580        // Missing files are skipped gracefully
581        assert!(result.is_ok());
582    }
583
584    #[test]
585    fn test_packer_generates_skeleton_map_at_top() {
586        let (_dir, file_path) = make_temp_rs_file("fn main() {}\n");
587        let packer = Packer::new(
588            SqliteCache::new_in_memory().expect("failed to create test cache"),
589            Parser {},
590            OutputFormat::Xml,
591            None,
592            false,
593            false,
594            false,
595            false,
596            false,
597        );
598        let result = packer.pack(&[file_path]);
599        assert!(result.is_ok());
600        let output = result.expect("pack should succeed");
601        // The skeleton map should be at the top of the output
602        assert!(output.starts_with("<repository>\n  <skeleton_map>"));
603    }
604
605    #[test]
606    fn test_packer_token_governor_degrades_to_bones() {
607        // Set a very low max_tokens to force degradation to bones-only output
608        let (_dir, file_path) = make_temp_rs_file("fn main() { let x = 1; }\n");
609        let packer = Packer::new(
610            SqliteCache::new_in_memory().expect("failed to create test cache"),
611            Parser {},
612            OutputFormat::Xml,
613            Some(10),
614            false,
615            false,
616            false,
617            false,
618            false,
619        );
620        let result = packer.pack(&[file_path]);
621        assert!(result.is_ok());
622        let output = result.expect("pack should succeed");
623        // When degraded to bones, full file content should not appear in output
624        assert!(!output.contains("<content>"));
625    }
626
627    // -------------------------------------------------------------------------
628    // Helper: create a temp file with a given extension
629    // -------------------------------------------------------------------------
630    fn make_temp_file(dir: &tempfile::TempDir, filename: &str, content: &str) -> PathBuf {
631        let file_path = dir.path().join(filename);
632        if let Some(parent) = file_path.parent() {
633            std::fs::create_dir_all(parent).expect("failed to create parent directories");
634        }
635        let mut f = std::fs::File::create(&file_path).expect("failed to create temp file");
636        f.write_all(content.as_bytes())
637            .expect("failed to write file content");
638        file_path
639    }
640
641    // =========================================================================
642    // XML output correctness
643    // =========================================================================
644
645    /// Symbol names with XML special characters should be escaped in XML output.
646    /// This test describes CORRECT behavior. The current implementation does NOT
647    /// escape these characters in <signature> tags — so this test is expected to
648    /// FAIL until the implementation is fixed.
649    #[test]
650    fn test_xml_signature_special_chars_are_escaped() {
651        use crate::cache::CacheStore;
652
653        let cache = SqliteCache::new_in_memory().expect("failed to create test cache");
654        cache.init().expect("failed to init cache schema");
655
656        // Insert a file + symbol with XML-dangerous characters in the name.
657        let file_id = cache
658            .upsert_file("bad.rs", "h1", b"fn bad() {}")
659            .expect("upsert_file should succeed");
660        cache
661            .insert_symbol(&crate::cache::Symbol {
662                id: "s1".to_string(),
663                file_id,
664                name: "<script>&\"test\"</script>".to_string(),
665                kind: "function".to_string(),
666                byte_offset: 0,
667                byte_length: 11,
668            })
669            .expect("symbol insert should succeed");
670
671        let dir = tempfile::TempDir::new().expect("failed to create temp dir");
672        let file_path = make_temp_file(&dir, "bad.rs", "fn bad() {}\n");
673
674        let packer = Packer::with_workspace_root(
675            cache,
676            Parser {},
677            dir.path().to_path_buf(),
678            OutputFormat::Xml,
679            None,
680            false, // no_file_summary
681            false, // no_files
682            false,
683            false,
684            false,
685        );
686        let output = packer.pack(&[file_path]).expect("pack should succeed");
687
688        // The raw unescaped characters must NOT appear outside of CDATA in XML attributes/tags.
689        // Correct output would use &lt; &gt; &amp; &quot; instead.
690        assert!(
691            !output.contains("<script>"),
692            "Bare <script> tag should not appear in XML output; expected escaped form"
693        );
694        assert!(
695            output.contains("&lt;script&gt;") || output.contains("&amp;"),
696            "XML special characters in symbol names must be escaped"
697        );
698    }
699
700    /// File paths with XML special characters should be escaped in path attributes.
701    /// This test describes CORRECT behavior and is expected to FAIL until fixed.
702    #[test]
703    fn test_xml_path_attribute_special_chars_are_escaped() {
704        let dir = tempfile::TempDir::new().expect("failed to create temp dir");
705        // Use a filename that contains an ampersand (legal on most filesystems).
706        let file_path = make_temp_file(&dir, "a&b.txt", "hello world\n");
707
708        let packer = Packer::new(
709            SqliteCache::new_in_memory().expect("failed to create test cache"),
710            Parser {},
711            OutputFormat::Xml,
712            None,
713            false,
714            false,
715            false,
716            false,
717            false,
718        );
719        let output = packer.pack(&[file_path]).expect("pack should succeed");
720
721        // The bare & must be escaped as &amp; in XML attributes.
722        assert!(
723            !output.contains("path=\"") || !output.contains("a&b.txt\""),
724            "Bare & in path attribute must be escaped as &amp;"
725        );
726    }
727
728    /// File content containing `]]>` inside a CDATA section must be escaped so
729    /// the XML document stays well-formed.
730    #[test]
731    fn test_xml_cdata_cdata_end_sequence_is_escaped() {
732        let dir = tempfile::TempDir::new().expect("failed to create temp dir");
733        // Content that would prematurely close a CDATA section.
734        let tricky = "let s = \"]]>\";\n";
735        let file_path = make_temp_file(&dir, "tricky.txt", tricky);
736
737        let packer = Packer::new(
738            SqliteCache::new_in_memory().expect("failed to create test cache"),
739            Parser {},
740            OutputFormat::Xml,
741            None,
742            false,
743            false,
744            false,
745            false,
746            false,
747        );
748        let output = packer.pack(&[file_path]).expect("pack should succeed");
749
750        // The raw ]]> sequence must not appear verbatim inside a CDATA section.
751        // The implementation splits it as ]]]]><![CDATA[>.
752        // After the transformation there should be no bare ]]> that closes CDATA prematurely.
753        // A simple check: every ]]> in the output must be followed immediately by </content>
754        // (i.e., it is the legitimate CDATA close).
755        let positions: Vec<_> = output.match_indices("]]>").collect();
756        for (idx, _) in &positions {
757            let after = &output[idx + 3..];
758            assert!(
759                after.starts_with("</content>"),
760                "Found ]]> at position {} that is not the CDATA closing sequence; \
761                 raw content may break XML well-formedness",
762                idx
763            );
764        }
765    }
766
767    /// A basic well-formedness check: the XML output should have balanced
768    /// `<repository>` / `</repository>` tags and no bare `<` or `>` outside CDATA.
769    #[test]
770    fn test_xml_output_basic_well_formedness() {
771        let (_dir, file_path) = make_temp_rs_file("fn main() {}\n");
772
773        let packer = Packer::new(
774            SqliteCache::new_in_memory().expect("failed to create test cache"),
775            Parser {},
776            OutputFormat::Xml,
777            None,
778            false,
779            false,
780            false,
781            false,
782            false,
783        );
784        let output = packer.pack(&[file_path]).expect("pack should succeed");
785
786        assert!(
787            output.starts_with("<repository>"),
788            "XML output must start with <repository>"
789        );
790        assert!(
791            output.trim_end().ends_with("</repository>"),
792            "XML output must end with </repository>"
793        );
794
795        // Strip all CDATA sections before checking for bare angle brackets.
796        let cdata_re =
797            regex::Regex::new(r"(?s)<!\[CDATA\[.*?]]>").expect("failed to compile cdata regex");
798        let stripped = cdata_re.replace_all(&output, "");
799
800        // Any remaining < must be the start of a tag (followed by [/a-zA-Z!?])
801        for (i, ch) in stripped.char_indices() {
802            if ch == '<' {
803                let next = stripped[i + 1..].chars().next();
804                assert!(
805                    matches!(next, Some('/' | '!' | '?' | 'a'..='z' | 'A'..='Z')),
806                    "Bare < found at position {} outside of CDATA: ...{}...",
807                    i,
808                    &stripped[i.saturating_sub(10)..std::cmp::min(i + 20, stripped.len())]
809                );
810            }
811        }
812    }
813
814    // =========================================================================
815    // Markdown output correctness
816    // =========================================================================
817
818    /// Markdown skeleton map must indent symbol entries with two spaces under
819    /// their parent file bullet.
820    #[test]
821    fn test_markdown_skeleton_map_indentation() {
822        use crate::cache::CacheStore;
823
824        let cache = SqliteCache::new_in_memory().expect("failed to create test cache");
825        cache.init().expect("failed to init cache schema");
826
827        let dir = tempfile::TempDir::new().expect("failed to create temp dir");
828        let file_path = make_temp_file(&dir, "lib.rs", "fn alpha() {}\n");
829
830        let file_id = cache
831            .upsert_file("lib.rs", "h2", b"fn alpha() {}")
832            .expect("upsert_file should succeed");
833        cache
834            .insert_symbol(&crate::cache::Symbol {
835                id: "s_alpha".to_string(),
836                file_id,
837                name: "alpha".to_string(),
838                kind: "function".to_string(),
839                byte_offset: 0,
840                byte_length: 13,
841            })
842            .expect("symbol insert should succeed");
843
844        let packer = Packer::with_workspace_root(
845            cache,
846            Parser {},
847            dir.path().to_path_buf(),
848            OutputFormat::Markdown,
849            None,
850            false,
851            true, // no_files — only generate skeleton map
852            false,
853            false,
854            false,
855        );
856        let output = packer.pack(&[file_path]).expect("pack should succeed");
857
858        // The file should appear as a bullet: "- <path>"
859        assert!(
860            output.contains("- "),
861            "File bullet not found in Markdown output"
862        );
863
864        // Each symbol under the file should be indented with two spaces: "  - kind name"
865        assert!(
866            output.contains("  - function alpha"),
867            "Symbol entries in skeleton map must be indented with two spaces; got:\n{}",
868            output
869        );
870    }
871
872    /// Markdown symbol names containing *, _, [, ], ` should appear verbatim and
873    /// must not break the overall Markdown skeleton structure (file bullet is still present).
874    #[test]
875    fn test_markdown_symbol_names_with_special_chars() {
876        use crate::cache::CacheStore;
877
878        let cache = SqliteCache::new_in_memory().expect("failed to create test cache");
879        cache.init().expect("failed to init cache schema");
880
881        let dir = tempfile::TempDir::new().expect("failed to create temp dir");
882        let file_path = make_temp_file(&dir, "weird.rs", "fn weird() {}\n");
883
884        let file_id = cache
885            .upsert_file("weird.rs", "h3", b"fn weird() {}")
886            .expect("upsert_file should succeed");
887        // Symbol name with markdown special characters
888        cache
889            .insert_symbol(&crate::cache::Symbol {
890                id: "s_weird".to_string(),
891                file_id,
892                name: "*_[weird`_]*".to_string(),
893                kind: "function".to_string(),
894                byte_offset: 0,
895                byte_length: 13,
896            })
897            .expect("symbol insert should succeed");
898
899        let packer = Packer::with_workspace_root(
900            cache,
901            Parser {},
902            dir.path().to_path_buf(),
903            OutputFormat::Markdown,
904            None,
905            false,
906            true, // no_files
907            false,
908            false,
909            false,
910        );
911        let output = packer.pack(&[file_path]).expect("pack should succeed");
912
913        // The file bullet must still be present — structure is intact.
914        assert!(output.contains("- "), "File bullet disappeared");
915
916        // The weird symbol name should appear verbatim in the output.
917        assert!(
918            output.contains("*_[weird`_]*"),
919            "Symbol name with Markdown special chars should appear verbatim"
920        );
921    }
922
923    #[test]
924    fn test_markdown_skeleton_map_uses_exact_relative_path_for_duplicate_basenames() {
925        use crate::cache::CacheStore;
926
927        let cache = SqliteCache::new_in_memory().expect("failed to create test cache");
928        cache.init().expect("failed to init cache schema");
929
930        let dir = tempfile::TempDir::new().expect("failed to create temp dir");
931        std::fs::create_dir_all(dir.path().join("src")).expect("create src dir");
932        std::fs::create_dir_all(dir.path().join("tests")).expect("create tests dir");
933
934        let src_path = make_temp_file(&dir, "src/lib.rs", "fn alpha() {}\n");
935        let tests_path = make_temp_file(&dir, "tests/lib.rs", "fn beta() {}\n");
936
937        let src_file_id = cache
938            .upsert_file("src/lib.rs", "h-src", b"fn alpha() {}")
939            .expect("upsert_file should succeed");
940        cache
941            .insert_symbol(&crate::cache::Symbol {
942                id: "src_alpha".to_string(),
943                file_id: src_file_id,
944                name: "alpha".to_string(),
945                kind: "function".to_string(),
946                byte_offset: 0,
947                byte_length: 13,
948            })
949            .expect("insert alpha symbol");
950
951        let tests_file_id = cache
952            .upsert_file("tests/lib.rs", "h-tests", b"fn beta() {}")
953            .expect("upsert_file should succeed");
954        cache
955            .insert_symbol(&crate::cache::Symbol {
956                id: "tests_beta".to_string(),
957                file_id: tests_file_id,
958                name: "beta".to_string(),
959                kind: "function".to_string(),
960                byte_offset: 0,
961                byte_length: 12,
962            })
963            .expect("insert beta symbol");
964
965        let packer = Packer::with_workspace_root(
966            cache,
967            Parser {},
968            dir.path().to_path_buf(),
969            OutputFormat::Markdown,
970            None,
971            false,
972            true,
973            false,
974            false,
975            false,
976        );
977        let output = packer
978            .pack(&[src_path.clone(), tests_path.clone()])
979            .expect("pack should succeed");
980
981        let expected_src = format!("- {}\n  - function alpha", src_path.display());
982        let expected_tests = format!("- {}\n  - function beta", tests_path.display());
983        assert!(
984            output.contains(&expected_src),
985            "src/lib.rs should retain its own symbols; got:\n{output}"
986        );
987        assert!(
988            output.contains(&expected_tests),
989            "tests/lib.rs should retain its own symbols; got:\n{output}"
990        );
991    }
992
993    #[test]
994    fn test_plugin_detection_uses_workspace_root_for_nested_files() {
995        struct RootMarkerPlugin;
996
997        impl ContextPlugin for RootMarkerPlugin {
998            fn name(&self) -> &str {
999                "root-marker"
1000            }
1001
1002            fn detect(&self, workspace_root: &Path) -> bool {
1003                workspace_root.join("manifest.json").exists()
1004            }
1005
1006            fn enrich(&self, _file_path: &Path, base_bones: &mut Vec<Bone>) -> Result<()> {
1007                for bone in base_bones.iter_mut() {
1008                    bone.metadata
1009                        .insert("root_detected".to_string(), "true".to_string());
1010                }
1011                Ok(())
1012            }
1013        }
1014
1015        let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1016        std::fs::write(dir.path().join("manifest.json"), "{}").expect("write root marker");
1017        let nested = make_temp_file(&dir, "src/lib.rs", "fn nested() {}\n");
1018
1019        let mut packer = Packer::with_workspace_root(
1020            SqliteCache::new_in_memory().expect("failed to create test cache"),
1021            Parser {},
1022            dir.path().to_path_buf(),
1023            OutputFormat::Xml,
1024            None,
1025            false,
1026            false,
1027            false,
1028            false,
1029            false,
1030        );
1031        packer.register_plugin(Box::new(RootMarkerPlugin));
1032
1033        let output = packer.pack(&[nested]).expect("pack should succeed");
1034        assert!(
1035            output.contains("root_detected"),
1036            "plugin detect() should run against workspace root and enrich nested files"
1037        );
1038    }
1039
1040    // =========================================================================
1041    // Token governor
1042    // =========================================================================
1043
1044    /// With a generous budget, all file content should be included.
1045    #[test]
1046    fn test_token_governor_generous_budget_includes_content() {
1047        let (_dir, file_path) = make_temp_rs_file("fn main() { let x = 42; }\n");
1048
1049        let packer = Packer::new(
1050            SqliteCache::new_in_memory().expect("failed to create test cache"),
1051            Parser {},
1052            OutputFormat::Xml,
1053            Some(100_000), // very large budget
1054            false,
1055            false,
1056            false,
1057            false,
1058            false,
1059        );
1060        let output = packer.pack(&[file_path]).expect("pack should succeed");
1061
1062        // Content block should be present.
1063        assert!(
1064            output.contains("<content><![CDATA["),
1065            "Expected <content> block when budget is generous; got:\n{}",
1066            output
1067        );
1068    }
1069
1070    /// With a budget of 1 token, content must be omitted (only skeleton map output).
1071    #[test]
1072    fn test_token_governor_one_token_budget_omits_content() {
1073        let (_dir, file_path) = make_temp_rs_file("fn main() { let x = 42; }\n");
1074
1075        let packer = Packer::new(
1076            SqliteCache::new_in_memory().expect("failed to create test cache"),
1077            Parser {},
1078            OutputFormat::Xml,
1079            Some(1), // impossibly tight budget
1080            false,
1081            false,
1082            false,
1083            false,
1084            false,
1085        );
1086        let result = packer.pack(&[file_path]);
1087
1088        // Must not panic or error.
1089        assert!(result.is_ok(), "pack() must not error under tight budget");
1090        let output = result.expect("pack should succeed");
1091
1092        // No file content should be present.
1093        assert!(
1094            !output.contains("<content>"),
1095            "No <content> block expected when budget is 1 token"
1096        );
1097    }
1098
1099    /// Degradation due to token exhaustion must be graceful — no panic, no Err.
1100    #[test]
1101    fn test_token_governor_graceful_degradation_no_panic() {
1102        let (_dir, file_path) =
1103            make_temp_rs_file("fn a() { 1 }\nfn b() { 2 }\nfn c() { 3 }\nfn d() { 4 }\n");
1104
1105        for budget in [0usize, 1, 5, 50] {
1106            let packer = Packer::new(
1107                SqliteCache::new_in_memory().expect("failed to create test cache"),
1108                Parser {},
1109                OutputFormat::Xml,
1110                Some(budget),
1111                false,
1112                false,
1113                false,
1114                false,
1115                false,
1116            );
1117            let result = packer.pack(std::slice::from_ref(&file_path));
1118            assert!(
1119                result.is_ok(),
1120                "pack() panicked or errored at max_tokens={}",
1121                budget
1122            );
1123        }
1124    }
1125
1126    // =========================================================================
1127    // Flag combinations
1128    // =========================================================================
1129
1130    /// no_files=true AND no_file_summary=true together — the output should be
1131    /// minimal: just the opening/closing repository tags and nothing else.
1132    #[test]
1133    fn test_no_files_and_no_file_summary_together() {
1134        let (_dir, file_path) = make_temp_rs_file("fn main() {}\n");
1135
1136        let packer = Packer::new(
1137            SqliteCache::new_in_memory().expect("failed to create test cache"),
1138            Parser {},
1139            OutputFormat::Xml,
1140            None,
1141            true, // no_file_summary
1142            true, // no_files
1143            false,
1144            false,
1145            false,
1146        );
1147        let output = packer.pack(&[file_path]).expect("pack should succeed");
1148
1149        // Only the repository wrapper should be present.
1150        let trimmed = output.trim();
1151        assert_eq!(
1152            trimmed, "<repository>\n</repository>",
1153            "With both no_files and no_file_summary, output should be just the repository tags; got:\n{}",
1154            trimmed
1155        );
1156    }
1157
1158    /// remove_comments=true should strip `//` line comments from Rust source.
1159    #[test]
1160    fn test_remove_line_comments_from_rust() {
1161        let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1162        // Use .txt so the parser falls back to raw content (no body elision complicates things).
1163        let file_path = make_temp_file(
1164            &dir,
1165            "comments.txt",
1166            "let x = 1; // this is a comment\nlet y = 2;\n",
1167        );
1168
1169        let packer = Packer::new(
1170            SqliteCache::new_in_memory().expect("failed to create test cache"),
1171            Parser {},
1172            OutputFormat::Xml,
1173            None,
1174            false,
1175            false,
1176            true, // remove_comments
1177            false,
1178            false,
1179        );
1180        let output = packer.pack(&[file_path]).expect("pack should succeed");
1181
1182        assert!(
1183            !output.contains("// this is a comment"),
1184            "Line comment should be stripped; got:\n{}",
1185            output
1186        );
1187        assert!(
1188            output.contains("let x = 1;"),
1189            "Non-comment code should remain after stripping line comments"
1190        );
1191    }
1192
1193    /// remove_comments=true should strip `/* */` block comments.
1194    #[test]
1195    fn test_remove_block_comments() {
1196        let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1197        let file_path = make_temp_file(
1198            &dir,
1199            "block_comments.txt",
1200            "int x = /* inline block */ 42;\n/* multi\nline\ncomment */\nint y = 1;\n",
1201        );
1202
1203        let packer = Packer::new(
1204            SqliteCache::new_in_memory().expect("failed to create test cache"),
1205            Parser {},
1206            OutputFormat::Xml,
1207            None,
1208            false,
1209            false,
1210            true, // remove_comments
1211            false,
1212            false,
1213        );
1214        let output = packer.pack(&[file_path]).expect("pack should succeed");
1215
1216        assert!(
1217            !output.contains("inline block"),
1218            "Inline block comment should be stripped"
1219        );
1220        assert!(
1221            !output.contains("multi\nline\ncomment"),
1222            "Multi-line block comment should be stripped"
1223        );
1224        assert!(
1225            output.contains("int x ="),
1226            "Code outside block comment should be preserved"
1227        );
1228    }
1229
1230    /// remove_empty_lines=true should collapse multiple consecutive blank lines
1231    /// into a single newline.
1232    #[test]
1233    fn test_remove_empty_lines_collapses_blanks() {
1234        let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1235        let file_path = make_temp_file(
1236            &dir,
1237            "blanks.txt",
1238            "line one\n\n\n\nline two\n\n\nline three\n",
1239        );
1240
1241        let packer = Packer::new(
1242            SqliteCache::new_in_memory().expect("failed to create test cache"),
1243            Parser {},
1244            OutputFormat::Xml,
1245            None,
1246            false,
1247            false,
1248            false,
1249            true, // remove_empty_lines
1250            false,
1251        );
1252        let output = packer.pack(&[file_path]).expect("pack should succeed");
1253
1254        // There must be no run of more than one blank line in the content.
1255        assert!(
1256            !output.contains("\n\n\n"),
1257            "Multiple consecutive blank lines should be collapsed to a single newline; got:\n{}",
1258            output
1259        );
1260        assert!(
1261            output.contains("line one"),
1262            "Non-blank lines must be preserved"
1263        );
1264        assert!(
1265            output.contains("line two"),
1266            "Non-blank lines must be preserved"
1267        );
1268    }
1269
1270    /// truncate_base64=true should replace strings of 100+ alphanumeric chars
1271    /// with the placeholder `[TRUNCATED_BASE64]`.
1272    #[test]
1273    fn test_truncate_base64_replaces_long_strings() {
1274        let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1275        // Exactly 100 alphanumeric chars — the boundary that SHOULD be truncated.
1276        let long_token = "A".repeat(100);
1277        let content = format!("key = {}\n", long_token);
1278        let file_path = make_temp_file(&dir, "tokens.txt", &content);
1279
1280        let packer = Packer::new(
1281            SqliteCache::new_in_memory().expect("failed to create test cache"),
1282            Parser {},
1283            OutputFormat::Xml,
1284            None,
1285            false,
1286            false,
1287            false,
1288            false,
1289            true, // truncate_base64
1290        );
1291        let output = packer.pack(&[file_path]).expect("pack should succeed");
1292
1293        assert!(
1294            output.contains("[TRUNCATED_BASE64]"),
1295            "A 100-char alphanumeric string should be replaced with [TRUNCATED_BASE64]"
1296        );
1297        assert!(
1298            !output.contains(&long_token),
1299            "The original long token must not appear in output after truncation"
1300        );
1301    }
1302
1303    /// truncate_base64=true must NOT truncate strings of 99 characters or fewer.
1304    #[test]
1305    fn test_truncate_base64_preserves_short_strings() {
1306        let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1307        // 99 alphanumeric chars — one below the truncation threshold.
1308        let short_token = "B".repeat(99);
1309        let content = format!("key = {}\n", short_token);
1310        let file_path = make_temp_file(&dir, "short_tokens.txt", &content);
1311
1312        let packer = Packer::new(
1313            SqliteCache::new_in_memory().expect("failed to create test cache"),
1314            Parser {},
1315            OutputFormat::Xml,
1316            None,
1317            false,
1318            false,
1319            false,
1320            false,
1321            true, // truncate_base64
1322        );
1323        let output = packer.pack(&[file_path]).expect("pack should succeed");
1324
1325        assert!(
1326            output.contains(&short_token),
1327            "A 99-char string must NOT be truncated"
1328        );
1329        assert!(
1330            !output.contains("[TRUNCATED_BASE64]"),
1331            "No truncation should occur for strings under 100 chars"
1332        );
1333    }
1334
1335    // =========================================================================
1336    // Multiple files
1337    // =========================================================================
1338
1339    /// Packer with 3 files: all three must appear in the skeleton map.
1340    #[test]
1341    fn test_three_files_all_appear_in_skeleton_map() {
1342        let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1343        let f1 = make_temp_file(&dir, "one.txt", "content one\n");
1344        let f2 = make_temp_file(&dir, "two.txt", "content two\n");
1345        let f3 = make_temp_file(&dir, "three.txt", "content three\n");
1346
1347        let packer = Packer::new(
1348            SqliteCache::new_in_memory().expect("failed to create test cache"),
1349            Parser {},
1350            OutputFormat::Xml,
1351            None,
1352            false,
1353            false,
1354            false,
1355            false,
1356            false,
1357        );
1358        let output = packer.pack(&[f1, f2, f3]).expect("pack should succeed");
1359
1360        assert!(output.contains("one.txt"), "one.txt missing from output");
1361        assert!(output.contains("two.txt"), "two.txt missing from output");
1362        assert!(
1363            output.contains("three.txt"),
1364            "three.txt missing from output"
1365        );
1366    }
1367
1368    /// Files must appear in the skeleton map in the same order they were supplied
1369    /// to pack() — i.e., the ordering is deterministic.
1370    #[test]
1371    fn test_skeleton_map_preserves_input_order() {
1372        let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1373        let f1 = make_temp_file(&dir, "alpha.txt", "alpha\n");
1374        let f2 = make_temp_file(&dir, "beta.txt", "beta\n");
1375        let f3 = make_temp_file(&dir, "gamma.txt", "gamma\n");
1376
1377        let packer = Packer::new(
1378            SqliteCache::new_in_memory().expect("failed to create test cache"),
1379            Parser {},
1380            OutputFormat::Xml,
1381            None,
1382            false,
1383            false,
1384            false,
1385            false,
1386            false,
1387        );
1388        let output = packer.pack(&[f1, f2, f3]).expect("pack should succeed");
1389
1390        let pos_alpha = output.find("alpha.txt").expect("alpha.txt not found");
1391        let pos_beta = output.find("beta.txt").expect("beta.txt not found");
1392        let pos_gamma = output.find("gamma.txt").expect("gamma.txt not found");
1393
1394        assert!(
1395            pos_alpha < pos_beta && pos_beta < pos_gamma,
1396            "Files must appear in the skeleton map in the order they were supplied"
1397        );
1398    }
1399
1400    // =========================================================================
1401    // Binary / missing files
1402    // =========================================================================
1403
1404    /// A file that exists when pack() starts being called but has been deleted
1405    /// before its content is read should be gracefully skipped — no panic, no Err,
1406    /// just a warning on stderr.
1407    #[test]
1408    fn test_deleted_file_is_gracefully_skipped() {
1409        let dir = tempfile::TempDir::new().expect("failed to create temp dir");
1410        let file_path = make_temp_file(&dir, "ephemeral.txt", "will be deleted\n");
1411
1412        // Delete the file before calling pack().
1413        std::fs::remove_file(&file_path).expect("failed to delete ephemeral file");
1414
1415        let packer = Packer::new(
1416            SqliteCache::new_in_memory().expect("failed to create test cache"),
1417            Parser {},
1418            OutputFormat::Xml,
1419            None,
1420            false,
1421            false,
1422            false,
1423            false,
1424            false,
1425        );
1426        let result = packer.pack(&[file_path]);
1427
1428        assert!(
1429            result.is_ok(),
1430            "pack() must not return Err when a file has been deleted; got: {:?}",
1431            result.err()
1432        );
1433
1434        let output = result.expect("pack should succeed even when file is deleted");
1435        // The output should still be a well-formed XML document.
1436        assert!(
1437            output.contains("<repository>"),
1438            "Output must start with <repository>"
1439        );
1440        assert!(
1441            output.trim_end().ends_with("</repository>"),
1442            "Output must end with </repository>"
1443        );
1444        // No content should be emitted for the missing file.
1445        assert!(
1446            !output.contains("will be deleted"),
1447            "Content of deleted file must not appear in output"
1448        );
1449    }
1450
1451    // =========================================================================
1452    // Metadata XML injection (Amber team gap #1)
1453    // =========================================================================
1454
1455    /// Plugin metadata keys and values containing XML-dangerous characters must be
1456    /// escaped before being written into the <metadata> element.
1457    ///
1458    /// This test describes CORRECT behavior. The current implementation does NOT
1459    /// escape metadata key/value strings — so this test is expected to FAIL until
1460    /// the implementation is fixed.
1461    #[test]
1462    fn test_plugin_metadata_xml_escaping() {
1463        struct XmlDangerousPlugin;
1464
1465        impl ContextPlugin for XmlDangerousPlugin {
1466            fn name(&self) -> &str {
1467                "xml_dangerous"
1468            }
1469
1470            fn detect(&self, _directory: &Path) -> bool {
1471                true
1472            }
1473
1474            fn enrich(&self, _file_path: &Path, base_bones: &mut Vec<Bone>) -> Result<()> {
1475                for bone in base_bones.iter_mut() {
1476                    // Key with XML-dangerous characters
1477                    bone.metadata.insert(
1478                        "key<with>&\"special".to_string(),
1479                        // Value that attempts XML injection: inject a sibling element
1480                        "</metadata><malicious>payload</malicious><metadata key=\"x\">".to_string(),
1481                    );
1482                }
1483                Ok(())
1484            }
1485        }
1486
1487        let (_dir, file_path) = make_temp_rs_file("fn main() {}\n");
1488        let mut packer = Packer::new(
1489            SqliteCache::new_in_memory().expect("failed to create test cache"),
1490            Parser {},
1491            OutputFormat::Xml,
1492            None,
1493            false,
1494            false,
1495            false,
1496            false,
1497            false,
1498        );
1499        packer.register_plugin(Box::new(XmlDangerousPlugin));
1500
1501        let output = packer.pack(&[file_path]).expect("pack should succeed");
1502
1503        // The raw injection string must NOT appear verbatim in the output.
1504        assert!(
1505            !output.contains("<malicious>"),
1506            "Bare <malicious> tag found in output — metadata value was not XML-escaped; got:\n{}",
1507            output
1508        );
1509        assert!(
1510            !output.contains("</malicious>"),
1511            "Bare </malicious> tag found in output — metadata value was not XML-escaped; got:\n{}",
1512            output
1513        );
1514
1515        // Escaped forms must be present instead.
1516        // The value contains '<' and '>' so at minimum &lt; and/or &gt; must appear.
1517        assert!(
1518            output.contains("&lt;") || output.contains("&gt;") || output.contains("&amp;"),
1519            "Expected XML-escaped entities (&lt;, &gt;, or &amp;) in metadata output; got:\n{}",
1520            output
1521        );
1522
1523        // The document must still be well-formed (closing tag present).
1524        assert!(
1525            output.contains("</repository>"),
1526            "Output must still contain </repository> after metadata injection; got:\n{}",
1527            output
1528        );
1529    }
1530}