Skip to main content

infiniloom_engine/document/
output.rs

1//! Document-specific output formatters for LLM consumption.
2//!
3//! Formats a Document into XML (Claude), Markdown (GPT), or JSON (agents).
4
5use crate::document::types::*;
6use crate::output::escaping;
7
8/// Format a document as Claude-optimized XML.
9pub fn format_xml(doc: &Document) -> String {
10    let mut out = String::with_capacity(doc.block_count() * 200);
11    out.push_str("<document>\n");
12
13    // Metadata
14    format_xml_metadata(&mut out, doc);
15
16    // Table of contents (compact)
17    if doc.section_count() > 2 {
18        out.push_str("  <table_of_contents>\n");
19        write_toc_xml(&mut out, &doc.sections, 2);
20        out.push_str("  </table_of_contents>\n\n");
21    }
22
23    // Sections
24    for section in &doc.sections {
25        write_section_xml(&mut out, section, 1);
26    }
27
28    out.push_str("</document>\n");
29    out
30}
31
32fn format_xml_metadata(out: &mut String, doc: &Document) {
33    out.push_str("  <metadata>\n");
34    if let Some(title) = &doc.title {
35        out.push_str(&format!("    <title>{}</title>\n", escaping::escape_xml_text(title)));
36    }
37    let m = &doc.metadata;
38    if let Some(v) = &m.author {
39        out.push_str(&format!("    <author>{}</author>\n", escaping::escape_xml_text(v)));
40    }
41    if let Some(v) = &m.version {
42        out.push_str(&format!("    <version>{}</version>\n", escaping::escape_xml_text(v)));
43    }
44    if let Some(v) = &m.effective_date {
45        out.push_str(&format!(
46            "    <effective_date>{}</effective_date>\n",
47            escaping::escape_xml_text(v)
48        ));
49    }
50    if let Some(v) = &m.classification {
51        out.push_str(&format!(
52            "    <classification>{}</classification>\n",
53            escaping::escape_xml_text(v)
54        ));
55    }
56    out.push_str(&format!("    <format>{}</format>\n", doc.format.name()));
57    out.push_str("  </metadata>\n\n");
58}
59
60fn write_toc_xml(out: &mut String, sections: &[Section], depth: usize) {
61    let indent = " ".repeat(depth * 2);
62    for section in sections {
63        if let Some(title) = &section.title {
64            let id_attr = section
65                .id
66                .as_ref()
67                .map(|id| format!(" id=\"{}\"", escaping::escape_xml_attribute(id)))
68                .unwrap_or_default();
69            let num_attr = section
70                .number
71                .as_ref()
72                .map(|n| format!(" number=\"{}\"", escaping::escape_xml_attribute(n)))
73                .unwrap_or_default();
74            out.push_str(&format!(
75                "{indent}<entry level=\"{}\"{id_attr}{num_attr}>{}</entry>\n",
76                section.level,
77                escaping::escape_xml_text(title)
78            ));
79        }
80        write_toc_xml(out, &section.children, depth + 1);
81    }
82}
83
84fn write_section_xml(out: &mut String, section: &Section, depth: usize) {
85    let indent = " ".repeat(depth * 2);
86
87    let mut attrs = format!("level=\"{}\"", section.level);
88    if let Some(id) = &section.id {
89        attrs.push_str(&format!(" id=\"{}\"", escaping::escape_xml_attribute(id)));
90    }
91    if let Some(num) = &section.number {
92        attrs.push_str(&format!(" number=\"{}\"", escaping::escape_xml_attribute(num)));
93    }
94    if let Some(title) = &section.title {
95        attrs.push_str(&format!(" title=\"{}\"", escaping::escape_xml_attribute(title)));
96    }
97
98    out.push_str(&format!("{indent}<section {attrs}>\n"));
99
100    for block in &section.content {
101        write_block_xml(out, block, depth + 1);
102    }
103
104    for child in &section.children {
105        write_section_xml(out, child, depth + 1);
106    }
107
108    out.push_str(&format!("{indent}</section>\n"));
109}
110
111fn write_block_xml(out: &mut String, block: &ContentBlock, depth: usize) {
112    let indent = " ".repeat(depth * 2);
113    match block {
114        ContentBlock::Paragraph(text) => {
115            out.push_str(&format!(
116                "{indent}<paragraph>{}</paragraph>\n",
117                escaping::escape_xml_text(text)
118            ));
119        },
120        ContentBlock::Table(table) => {
121            out.push_str(&format!("{indent}<table"));
122            if let Some(cap) = &table.caption {
123                out.push_str(&format!(" caption=\"{}\"", escaping::escape_xml_attribute(cap)));
124            }
125            out.push_str(">\n");
126            if !table.headers.is_empty() {
127                out.push_str(&format!("{indent}  <headers>\n"));
128                for h in &table.headers {
129                    out.push_str(&format!(
130                        "{indent}    <col>{}</col>\n",
131                        escaping::escape_xml_text(h)
132                    ));
133                }
134                out.push_str(&format!("{indent}  </headers>\n"));
135            }
136            for row in &table.rows {
137                out.push_str(&format!("{indent}  <row>"));
138                for cell in row {
139                    out.push_str(&format!("<cell>{}</cell>", escaping::escape_xml_text(cell)));
140                }
141                out.push_str("</row>\n");
142            }
143            out.push_str(&format!("{indent}</table>\n"));
144        },
145        ContentBlock::List(list) => {
146            let tag = if list.ordered { "ordered_list" } else { "list" };
147            out.push_str(&format!("{indent}<{tag}>\n"));
148            for item in &list.items {
149                if let Some(children) = &item.children {
150                    // Item with nested sub-list
151                    out.push_str(&format!(
152                        "{indent}  <item>{}\n",
153                        escaping::escape_xml_text(&item.text)
154                    ));
155                    write_block_xml(out, &ContentBlock::List(children.clone()), depth + 2);
156                    out.push_str(&format!("{indent}  </item>\n"));
157                } else {
158                    out.push_str(&format!(
159                        "{indent}  <item>{}</item>\n",
160                        escaping::escape_xml_text(&item.text)
161                    ));
162                }
163            }
164            out.push_str(&format!("{indent}</{tag}>\n"));
165        },
166        ContentBlock::CodeBlock(code) => {
167            let lang_attr = code
168                .language
169                .as_ref()
170                .map(|l| format!(" language=\"{}\"", escaping::escape_xml_attribute(l)))
171                .unwrap_or_default();
172            // Escape ]]> sequences in CDATA to prevent injection
173            let safe_content = code.content.replace("]]>", "]]]]><![CDATA[>");
174            out.push_str(&format!(
175                "{indent}<code_block{lang_attr}><![CDATA[{safe_content}]]></code_block>\n",
176            ));
177        },
178        ContentBlock::Definition(def) => {
179            out.push_str(&format!(
180                "{indent}<definition term=\"{}\">{}</definition>\n",
181                escaping::escape_xml_attribute(&def.term),
182                escaping::escape_xml_text(&def.definition)
183            ));
184        },
185        ContentBlock::Blockquote(text) => {
186            out.push_str(&format!(
187                "{indent}<blockquote>{}</blockquote>\n",
188                escaping::escape_xml_text(text)
189            ));
190        },
191        ContentBlock::CrossReference(cr) => {
192            out.push_str(&format!(
193                "{indent}<cross_ref target=\"{}\">{}</cross_ref>\n",
194                escaping::escape_xml_attribute(&cr.target_id),
195                escaping::escape_xml_text(&cr.display_text)
196            ));
197        },
198        ContentBlock::ThematicBreak => {
199            out.push_str(&format!("{indent}<hr/>\n"));
200        },
201        ContentBlock::Raw(text) => {
202            out.push_str(&format!("{indent}<raw>{}</raw>\n", escaping::escape_xml_text(text)));
203        },
204    }
205}
206
207/// Format a document as GPT-optimized Markdown.
208pub fn format_markdown(doc: &Document) -> String {
209    let mut out = String::with_capacity(doc.block_count() * 200);
210
211    // Title and metadata
212    if let Some(title) = &doc.title {
213        out.push_str(&format!("# {title}\n\n"));
214    }
215
216    // Metadata block
217    let m = &doc.metadata;
218    let mut meta_parts = Vec::new();
219    if let Some(v) = &m.version {
220        meta_parts.push(format!("**Version**: {v}"));
221    }
222    if let Some(v) = &m.effective_date {
223        meta_parts.push(format!("**Effective**: {v}"));
224    }
225    if let Some(v) = &m.classification {
226        meta_parts.push(format!("**Classification**: {v}"));
227    }
228    if let Some(v) = &m.author {
229        meta_parts.push(format!("**Author**: {v}"));
230    }
231    if !meta_parts.is_empty() {
232        out.push_str(&format!("> {}\n\n", meta_parts.join(" | ")));
233    }
234
235    // Sections
236    for section in &doc.sections {
237        write_section_md(&mut out, section);
238    }
239
240    out
241}
242
243fn write_section_md(out: &mut String, section: &Section) {
244    if let Some(title) = &section.title {
245        let prefix = "#".repeat(section.level.max(1).min(6) as usize);
246        let number = section
247            .number
248            .as_ref()
249            .map(|n| format!("{n} "))
250            .unwrap_or_default();
251        out.push_str(&format!("{prefix} {number}{title}\n\n"));
252    }
253
254    for block in &section.content {
255        write_block_md(out, block);
256        out.push('\n');
257    }
258
259    for child in &section.children {
260        write_section_md(out, child);
261    }
262}
263
264fn write_block_md(out: &mut String, block: &ContentBlock) {
265    match block {
266        ContentBlock::Paragraph(text) => {
267            out.push_str(text);
268            out.push('\n');
269        },
270        ContentBlock::Table(table) => {
271            if !table.headers.is_empty() {
272                out.push_str("| ");
273                out.push_str(&table.headers.join(" | "));
274                out.push_str(" |\n");
275                out.push('|');
276                for _ in &table.headers {
277                    out.push_str("---|");
278                }
279                out.push('\n');
280            }
281            for row in &table.rows {
282                out.push_str("| ");
283                out.push_str(&row.join(" | "));
284                out.push_str(" |\n");
285            }
286        },
287        ContentBlock::List(list) => {
288            for (i, item) in list.items.iter().enumerate() {
289                if list.ordered {
290                    out.push_str(&format!("{}. {}\n", i + 1, item.text));
291                } else {
292                    out.push_str(&format!("- {}\n", item.text));
293                }
294                if let Some(children) = &item.children {
295                    // Render nested sub-list with indentation
296                    for (j, sub_item) in children.items.iter().enumerate() {
297                        if children.ordered {
298                            out.push_str(&format!("  {}. {}\n", j + 1, sub_item.text));
299                        } else {
300                            out.push_str(&format!("  - {}\n", sub_item.text));
301                        }
302                    }
303                }
304            }
305        },
306        ContentBlock::CodeBlock(code) => {
307            let lang = code.language.as_deref().unwrap_or("");
308            out.push_str(&format!("```{lang}\n{}\n```\n", code.content));
309        },
310        ContentBlock::Definition(def) => {
311            out.push_str(&format!("**{}**: {}\n", def.term, def.definition));
312        },
313        ContentBlock::Blockquote(text) => {
314            for line in text.lines() {
315                out.push_str(&format!("> {line}\n"));
316            }
317        },
318        ContentBlock::CrossReference(cr) => {
319            out.push_str(&format!("[{}]({})\n", cr.display_text, cr.target_id));
320        },
321        ContentBlock::ThematicBreak => {
322            out.push_str("---\n");
323        },
324        ContentBlock::Raw(text) => {
325            out.push_str(text);
326            out.push('\n');
327        },
328    }
329}
330
331/// Format a document as agent-friendly JSON.
332///
333/// Returns an error message in the output if serialization fails (should not
334/// happen for well-formed Document values).
335pub fn format_json(doc: &Document) -> String {
336    match serde_json::to_string_pretty(doc) {
337        Ok(json) => json,
338        Err(e) => {
339            eprintln!("Warning: JSON serialization failed: {e}");
340            String::new()
341        },
342    }
343}
344
345#[cfg(test)]
346mod tests {
347    use super::*;
348
349    fn make_test_doc() -> Document {
350        let mut doc = Document::new("/tmp/test.md", DocumentFormat::Markdown);
351        doc.title = Some("Test Policy".into());
352        doc.metadata.version = Some("1.0".into());
353        doc.metadata.classification = Some("Internal".into());
354
355        let mut s1 = Section::new(1, "Access Control");
356        s1.content
357            .push(ContentBlock::Paragraph("All users must authenticate.".into()));
358        s1.content.push(ContentBlock::Table(Table {
359            caption: Some("Access Matrix".into()),
360            headers: vec!["Role".into(), "Access".into()],
361            rows: vec![vec!["Admin".into(), "Full".into()]],
362            alignments: vec![],
363        }));
364
365        let mut s2 = Section::new(2, "MFA Requirements");
366        s2.content.push(ContentBlock::List(List {
367            ordered: true,
368            items: vec![
369                ListItem { text: "Hardware key".into(), children: None },
370                ListItem { text: "Authenticator app".into(), children: None },
371            ],
372        }));
373        s1.children.push(s2);
374
375        doc.sections.push(s1);
376        doc
377    }
378
379    #[test]
380    fn test_xml_output() {
381        let doc = make_test_doc();
382        let xml = format_xml(&doc);
383        assert!(xml.contains("<document>"));
384        assert!(xml.contains("<title>Test Policy</title>"));
385        assert!(xml.contains("<section level=\"1\""));
386        assert!(xml.contains("<table"));
387        assert!(xml.contains("<ordered_list>"));
388        assert!(xml.contains("</document>"));
389    }
390
391    #[test]
392    fn test_markdown_output() {
393        let doc = make_test_doc();
394        let md = format_markdown(&doc);
395        assert!(md.contains("# Test Policy"));
396        assert!(md.contains("# Access Control"));
397        assert!(md.contains("| Role | Access |"));
398        assert!(md.contains("1. Hardware key"));
399    }
400
401    #[test]
402    fn test_json_output() {
403        let doc = make_test_doc();
404        let json = format_json(&doc);
405        assert!(json.contains("\"Test Policy\""));
406        assert!(json.contains("\"Access Control\""));
407    }
408}