Skip to main content

argyph_pack/render/
xml.rs

1use camino::Utf8PathBuf;
2
3/// Render a set of packed files in the primary XML format.
4///
5/// Each file is wrapped in a `<file>` element with `path`, `tokens`, and
6/// `truncated` attributes. Content is wrapped in `<![CDATA[...]]>` blocks for
7/// readability; files containing `]]>` are split across multiple CDATA
8/// sections.
9///
10/// Output is guaranteed to be well-formed XML.
11pub fn render_xml(files: &[(Utf8PathBuf, &str, bool, usize)], repo_name: &str) -> String {
12    let mut out = String::new();
13    out.push_str("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
14    out.push_str(&format!(
15        "<repository name=\"{}\">\n",
16        xml_escape_attr(repo_name)
17    ));
18
19    for (path, content, truncated, token_count) in files {
20        let path_str = xml_escape_attr(path.as_str());
21        out.push_str(&format!(
22            "  <file path=\"{path_str}\" tokens=\"{token_count}\" truncated=\"{truncated}\">\n"
23        ));
24        out.push_str(&cdata_wrap(content));
25        out.push_str("\n  </file>\n");
26    }
27
28    out.push_str("</repository>\n");
29    out
30}
31
32/// Wrap content in `<![CDATA[...]]>` blocks. When the content contains the
33/// `]]>` sequence, it is split across multiple CDATA sections so the output
34/// remains well-formed.
35fn cdata_wrap(content: &str) -> String {
36    if !content.contains("]]>") {
37        return format!("<![CDATA[{content}]]>");
38    }
39    let safe = content.replace("]]>", "]]]]><![CDATA[>");
40    format!("<![CDATA[{safe}]]>")
41}
42
43/// Escape special XML characters for use in attribute values.
44fn xml_escape_attr(s: &str) -> String {
45    let mut out = String::with_capacity(s.len());
46    for ch in s.chars() {
47        match ch {
48            '&' => out.push_str("&amp;"),
49            '<' => out.push_str("&lt;"),
50            '>' => out.push_str("&gt;"),
51            '"' => out.push_str("&quot;"),
52            _ => out.push(ch),
53        }
54    }
55    out
56}
57
58#[cfg(test)]
59#[allow(clippy::unwrap_used)]
60mod tests {
61    use super::*;
62    use camino::Utf8PathBuf;
63
64    fn p(s: &str) -> Utf8PathBuf {
65        Utf8PathBuf::from(s)
66    }
67
68    #[test]
69    fn empty_file_list_produces_valid_xml() {
70        let result = render_xml(&[], "test-repo");
71        assert!(result.starts_with("<?xml"));
72        assert!(result.contains("<repository"));
73        assert!(result.contains("</repository>"));
74    }
75
76    #[test]
77    fn single_file_produces_file_element() {
78        let files = [(p("src/main.rs"), "fn main() {}", false, 5)];
79        let result = render_xml(&files, "my-repo");
80        assert!(result.contains("<file path=\"src/main.rs\""));
81        assert!(result.contains("tokens=\"5\""));
82        assert!(result.contains("truncated=\"false\""));
83        assert!(result.contains("<![CDATA[fn main() {}]]>"));
84    }
85
86    #[test]
87    fn truncated_file_shows_true() {
88        let files = [(p("src/lib.rs"), "pub fn foo() {}", true, 3)];
89        let result = render_xml(&files, "repo");
90        assert!(result.contains("truncated=\"true\""));
91    }
92
93    #[test]
94    fn cdata_splits_on_close_sequence() {
95        let content = "some text ]]> more text";
96        let wrapped = cdata_wrap(content);
97        // Should NOT contain literal ]]>
98        let after_start = &wrapped[9..]; // skip <![CDATA[
99        assert!(
100            !after_start.contains("]]>") || after_start.matches("]]>").count() <= 2,
101            "content ]]> was leaked"
102        );
103        // Well-formed: starts with <![CDATA[ and ends with ]]>
104        assert!(wrapped.starts_with("<![CDATA["));
105        assert!(wrapped.ends_with("]]>"));
106    }
107
108    #[test]
109    fn no_cdata_split_when_no_close_sequence() {
110        let content = "plain text without special chars";
111        let wrapped = cdata_wrap(content);
112        assert_eq!(wrapped, format!("<![CDATA[{content}]]>"));
113    }
114
115    #[test]
116    fn xml_escape_special_chars() {
117        let result = xml_escape_attr("a&b<c>d\"e");
118        assert_eq!(result, "a&amp;b&lt;c&gt;d&quot;e");
119    }
120
121    #[test]
122    fn xml_escape_plain_string_unchanged() {
123        let result = xml_escape_attr("hello_world");
124        assert_eq!(result, "hello_world");
125    }
126
127    #[test]
128    fn repo_name_escaped_in_attribute() {
129        let files = [(p("x.rs"), "", false, 0)];
130        let result = render_xml(&files, "repo & stuff");
131        assert!(result.contains("name=\"repo &amp; stuff\""));
132    }
133}