Skip to main content

pdf_docx/
writer.rs

1//! DOCX OOXML writer using quick-xml and zip.
2
3use crate::error::Result;
4use crate::layout::{map_font_name, DocxImage, PageElement, Paragraph, Run, Table};
5use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event};
6use quick_xml::Writer;
7use std::io::{Cursor, Write};
8use zip::write::SimpleFileOptions;
9use zip::ZipWriter;
10
11/// EMU per inch (for image sizing).
12const EMU_PER_INCH: i64 = 914400;
13
14/// Default image DPI assumption.
15const DEFAULT_DPI: f64 = 96.0;
16
17/// Write a complete DOCX file from page elements, returning bytes.
18pub fn write_docx(
19    elements: &[Vec<PageElement>],
20    images: &[DocxImage],
21    output: &mut Vec<u8>,
22) -> Result<()> {
23    let cursor = Cursor::new(Vec::new());
24    let mut zip = ZipWriter::new(cursor);
25    let options = SimpleFileOptions::default().compression_method(zip::CompressionMethod::Deflated);
26
27    // [Content_Types].xml
28    zip.start_file("[Content_Types].xml", options)?;
29    zip.write_all(&write_content_types(images)?)?;
30
31    // _rels/.rels
32    zip.start_file("_rels/.rels", options)?;
33    zip.write_all(&write_root_rels()?)?;
34
35    // word/_rels/document.xml.rels
36    zip.start_file("word/_rels/document.xml.rels", options)?;
37    zip.write_all(&write_document_rels(images)?)?;
38
39    // word/styles.xml
40    zip.start_file("word/styles.xml", options)?;
41    zip.write_all(&write_styles()?)?;
42
43    // word/document.xml
44    zip.start_file("word/document.xml", options)?;
45    zip.write_all(&write_document(elements, images)?)?;
46
47    // word/media/* (images)
48    for img in images {
49        let path = format!("word/media/{}", img.id);
50        zip.start_file(path, options)?;
51        zip.write_all(&img.data)?;
52    }
53
54    let cursor = zip.finish()?;
55    *output = cursor.into_inner();
56    Ok(())
57}
58
59fn write_content_types(images: &[DocxImage]) -> Result<Vec<u8>> {
60    let mut buf = Cursor::new(Vec::new());
61    let mut w = Writer::new_with_indent(&mut buf, b' ', 2);
62
63    w.write_event(Event::Decl(BytesDecl::new(
64        "1.0",
65        Some("UTF-8"),
66        Some("yes"),
67    )))?;
68
69    let mut types = BytesStart::new("Types");
70    types.push_attribute((
71        "xmlns",
72        "http://schemas.openxmlformats.org/package/2006/content-types",
73    ));
74    w.write_event(Event::Start(types))?;
75
76    // Default types
77    write_default(
78        &mut w,
79        "rels",
80        "application/vnd.openxmlformats-package.relationships+xml",
81    )?;
82    write_default(&mut w, "xml", "application/xml")?;
83
84    // Image types
85    let mut seen_ext: std::collections::HashSet<String> = std::collections::HashSet::new();
86    for img in images {
87        let ext = image_extension(&img.content_type);
88        if seen_ext.insert(ext.to_string()) {
89            write_default(&mut w, ext, &img.content_type)?;
90        }
91    }
92
93    // Override for document
94    write_override(
95        &mut w,
96        "/word/document.xml",
97        "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml",
98    )?;
99    write_override(
100        &mut w,
101        "/word/styles.xml",
102        "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml",
103    )?;
104
105    w.write_event(Event::End(BytesEnd::new("Types")))?;
106    Ok(buf.into_inner())
107}
108
109fn write_default(w: &mut Writer<&mut Cursor<Vec<u8>>>, ext: &str, ct: &str) -> Result<()> {
110    let mut el = BytesStart::new("Default");
111    el.push_attribute(("Extension", ext));
112    el.push_attribute(("ContentType", ct));
113    w.write_event(Event::Empty(el))?;
114    Ok(())
115}
116
117fn write_override(w: &mut Writer<&mut Cursor<Vec<u8>>>, part: &str, ct: &str) -> Result<()> {
118    let mut el = BytesStart::new("Override");
119    el.push_attribute(("PartName", part));
120    el.push_attribute(("ContentType", ct));
121    w.write_event(Event::Empty(el))?;
122    Ok(())
123}
124
125fn write_root_rels() -> Result<Vec<u8>> {
126    let mut buf = Cursor::new(Vec::new());
127    let mut w = Writer::new_with_indent(&mut buf, b' ', 2);
128
129    w.write_event(Event::Decl(BytesDecl::new(
130        "1.0",
131        Some("UTF-8"),
132        Some("yes"),
133    )))?;
134
135    let mut rels = BytesStart::new("Relationships");
136    rels.push_attribute((
137        "xmlns",
138        "http://schemas.openxmlformats.org/package/2006/relationships",
139    ));
140    w.write_event(Event::Start(rels))?;
141
142    let mut rel = BytesStart::new("Relationship");
143    rel.push_attribute(("Id", "rId1"));
144    rel.push_attribute((
145        "Type",
146        "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
147    ));
148    rel.push_attribute(("Target", "word/document.xml"));
149    w.write_event(Event::Empty(rel))?;
150
151    w.write_event(Event::End(BytesEnd::new("Relationships")))?;
152    Ok(buf.into_inner())
153}
154
155fn write_document_rels(images: &[DocxImage]) -> Result<Vec<u8>> {
156    let mut buf = Cursor::new(Vec::new());
157    let mut w = Writer::new_with_indent(&mut buf, b' ', 2);
158
159    w.write_event(Event::Decl(BytesDecl::new(
160        "1.0",
161        Some("UTF-8"),
162        Some("yes"),
163    )))?;
164
165    let mut rels = BytesStart::new("Relationships");
166    rels.push_attribute((
167        "xmlns",
168        "http://schemas.openxmlformats.org/package/2006/relationships",
169    ));
170    w.write_event(Event::Start(rels))?;
171
172    // Styles relationship
173    let mut rel = BytesStart::new("Relationship");
174    rel.push_attribute(("Id", "rId1"));
175    rel.push_attribute((
176        "Type",
177        "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles",
178    ));
179    rel.push_attribute(("Target", "styles.xml"));
180    w.write_event(Event::Empty(rel))?;
181
182    // Image relationships
183    for (i, img) in images.iter().enumerate() {
184        let rid = format!("rId{}", i + 2);
185        let target = format!("media/{}", img.id);
186        let mut rel = BytesStart::new("Relationship");
187        rel.push_attribute(("Id", rid.as_str()));
188        rel.push_attribute((
189            "Type",
190            "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image",
191        ));
192        rel.push_attribute(("Target", target.as_str()));
193        w.write_event(Event::Empty(rel))?;
194    }
195
196    w.write_event(Event::End(BytesEnd::new("Relationships")))?;
197    Ok(buf.into_inner())
198}
199
200fn write_styles() -> Result<Vec<u8>> {
201    let mut buf = Cursor::new(Vec::new());
202    let mut w = Writer::new_with_indent(&mut buf, b' ', 2);
203
204    w.write_event(Event::Decl(BytesDecl::new(
205        "1.0",
206        Some("UTF-8"),
207        Some("yes"),
208    )))?;
209
210    let mut styles = BytesStart::new("w:styles");
211    styles.push_attribute((
212        "xmlns:w",
213        "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
214    ));
215    w.write_event(Event::Start(styles))?;
216
217    // Default run properties
218    w.write_event(Event::Start(BytesStart::new("w:docDefaults")))?;
219    w.write_event(Event::Start(BytesStart::new("w:rPrDefault")))?;
220    w.write_event(Event::Start(BytesStart::new("w:rPr")))?;
221
222    let mut font = BytesStart::new("w:rFonts");
223    font.push_attribute(("w:ascii", "Calibri"));
224    font.push_attribute(("w:hAnsi", "Calibri"));
225    w.write_event(Event::Empty(font))?;
226
227    let mut sz = BytesStart::new("w:sz");
228    sz.push_attribute(("w:val", "24")); // 12pt = 24 half-points
229    w.write_event(Event::Empty(sz))?;
230
231    w.write_event(Event::End(BytesEnd::new("w:rPr")))?;
232    w.write_event(Event::End(BytesEnd::new("w:rPrDefault")))?;
233    w.write_event(Event::End(BytesEnd::new("w:docDefaults")))?;
234
235    // Normal style
236    let mut style = BytesStart::new("w:style");
237    style.push_attribute(("w:type", "paragraph"));
238    style.push_attribute(("w:styleId", "Normal"));
239    style.push_attribute(("w:default", "1"));
240    w.write_event(Event::Start(style))?;
241
242    w.write_event(Event::Start(BytesStart::new("w:name")))?;
243    w.write_event(Event::Text(BytesText::new("Normal")))?;
244    w.write_event(Event::End(BytesEnd::new("w:name")))?;
245
246    w.write_event(Event::End(BytesEnd::new("w:style")))?;
247
248    // Heading styles
249    for level in 1..=6u8 {
250        let style_id = format!("Heading{level}");
251        let mut style = BytesStart::new("w:style");
252        style.push_attribute(("w:type", "paragraph"));
253        style.push_attribute(("w:styleId", style_id.as_str()));
254        w.write_event(Event::Start(style))?;
255
256        let name = format!("heading {level}");
257        w.write_event(Event::Start(BytesStart::new("w:name")))?;
258        w.write_event(Event::Text(BytesText::new(&name)))?;
259        w.write_event(Event::End(BytesEnd::new("w:name")))?;
260
261        w.write_event(Event::Start(BytesStart::new("w:rPr")))?;
262        w.write_event(Event::Empty(BytesStart::new("w:b")))?;
263        let size = match level {
264            1 => "48",
265            2 => "36",
266            3 => "28",
267            _ => "24",
268        };
269        let mut sz = BytesStart::new("w:sz");
270        sz.push_attribute(("w:val", size));
271        w.write_event(Event::Empty(sz))?;
272        w.write_event(Event::End(BytesEnd::new("w:rPr")))?;
273
274        w.write_event(Event::End(BytesEnd::new("w:style")))?;
275    }
276
277    w.write_event(Event::End(BytesEnd::new("w:styles")))?;
278    Ok(buf.into_inner())
279}
280
281fn write_document(pages: &[Vec<PageElement>], images: &[DocxImage]) -> Result<Vec<u8>> {
282    let mut buf = Cursor::new(Vec::new());
283    let mut w = Writer::new_with_indent(&mut buf, b' ', 2);
284
285    w.write_event(Event::Decl(BytesDecl::new(
286        "1.0",
287        Some("UTF-8"),
288        Some("yes"),
289    )))?;
290
291    let mut doc = BytesStart::new("w:document");
292    doc.push_attribute((
293        "xmlns:w",
294        "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
295    ));
296    doc.push_attribute((
297        "xmlns:r",
298        "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
299    ));
300    doc.push_attribute((
301        "xmlns:wp",
302        "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing",
303    ));
304    doc.push_attribute((
305        "xmlns:a",
306        "http://schemas.openxmlformats.org/drawingml/2006/main",
307    ));
308    doc.push_attribute((
309        "xmlns:pic",
310        "http://schemas.openxmlformats.org/drawingml/2006/picture",
311    ));
312    w.write_event(Event::Start(doc))?;
313
314    w.write_event(Event::Start(BytesStart::new("w:body")))?;
315
316    let mut img_idx = 0;
317
318    for (page_idx, page_elements) in pages.iter().enumerate() {
319        // Insert a page break before every page except the first.
320        if page_idx > 0 && !page_elements.is_empty() {
321            write_page_break(&mut w)?;
322        }
323
324        for element in page_elements {
325            match element {
326                PageElement::Para(para) => write_paragraph(&mut w, para)?,
327                PageElement::Tbl(table) => write_table(&mut w, table)?,
328                PageElement::Img(img) => {
329                    let rid = format!("rId{}", find_image_rid(images, &img.id) + 2);
330                    write_image_paragraph(&mut w, img, &rid, img_idx)?;
331                    img_idx += 1;
332                }
333            }
334        }
335    }
336
337    w.write_event(Event::End(BytesEnd::new("w:body")))?;
338    w.write_event(Event::End(BytesEnd::new("w:document")))?;
339    Ok(buf.into_inner())
340}
341
342fn write_page_break(w: &mut Writer<&mut Cursor<Vec<u8>>>) -> Result<()> {
343    w.write_event(Event::Start(BytesStart::new("w:p")))?;
344    w.write_event(Event::Start(BytesStart::new("w:r")))?;
345    let mut br = BytesStart::new("w:br");
346    br.push_attribute(("w:type", "page"));
347    w.write_event(Event::Empty(br))?;
348    w.write_event(Event::End(BytesEnd::new("w:r")))?;
349    w.write_event(Event::End(BytesEnd::new("w:p")))?;
350    Ok(())
351}
352
353fn find_image_rid(images: &[DocxImage], id: &str) -> usize {
354    images.iter().position(|img| img.id == id).unwrap_or(0)
355}
356
357/// Detect heading level from paragraph font metrics.
358/// Returns None for normal text, Some(1..=6) for headings.
359fn detect_heading_level(para: &Paragraph) -> Option<u8> {
360    if para.runs.is_empty() {
361        return None;
362    }
363    // Use the largest font size across all runs.
364    let max_size = para
365        .runs
366        .iter()
367        .map(|r| r.font_size)
368        .fold(0.0_f64, f64::max);
369    let all_bold = para.runs.iter().all(|r| r.bold);
370    match () {
371        _ if max_size >= 24.0 => Some(1),
372        _ if max_size >= 18.0 => Some(2),
373        _ if max_size >= 14.0 => Some(3),
374        _ if max_size >= 12.0 && all_bold => Some(4),
375        _ => None,
376    }
377}
378
379fn write_paragraph(w: &mut Writer<&mut Cursor<Vec<u8>>>, para: &Paragraph) -> Result<()> {
380    w.write_event(Event::Start(BytesStart::new("w:p")))?;
381
382    // Apply heading style if detected.
383    if let Some(level) = detect_heading_level(para) {
384        w.write_event(Event::Start(BytesStart::new("w:pPr")))?;
385        let mut pstyle = BytesStart::new("w:pStyle");
386        pstyle.push_attribute(("w:val", format!("Heading{level}").as_str()));
387        w.write_event(Event::Empty(pstyle))?;
388        w.write_event(Event::End(BytesEnd::new("w:pPr")))?;
389    }
390
391    for run in &para.runs {
392        write_run(w, run)?;
393    }
394
395    w.write_event(Event::End(BytesEnd::new("w:p")))?;
396    Ok(())
397}
398
399fn write_run(w: &mut Writer<&mut Cursor<Vec<u8>>>, run: &Run) -> Result<()> {
400    w.write_event(Event::Start(BytesStart::new("w:r")))?;
401
402    // Run properties
403    let has_props = run.bold || run.italic || run.font_size != 12.0 || !run.font_name.is_empty();
404    if has_props {
405        w.write_event(Event::Start(BytesStart::new("w:rPr")))?;
406
407        if !run.font_name.is_empty() {
408            let mapped = map_font_name(&run.font_name);
409            let mut font = BytesStart::new("w:rFonts");
410            font.push_attribute(("w:ascii", mapped));
411            font.push_attribute(("w:hAnsi", mapped));
412            w.write_event(Event::Empty(font))?;
413        }
414
415        if run.bold {
416            w.write_event(Event::Empty(BytesStart::new("w:b")))?;
417        }
418
419        if run.italic {
420            w.write_event(Event::Empty(BytesStart::new("w:i")))?;
421        }
422
423        if (run.font_size - 12.0).abs() > 0.5 {
424            let half_pts = (run.font_size * 2.0).round() as i64;
425            let mut sz = BytesStart::new("w:sz");
426            sz.push_attribute(("w:val", half_pts.to_string().as_str()));
427            w.write_event(Event::Empty(sz))?;
428        }
429
430        w.write_event(Event::End(BytesEnd::new("w:rPr")))?;
431    }
432
433    // Text content — strip XML-invalid control characters (U+0000–U+0008,
434    // U+000B, U+000C, U+000E–U+001F) that would make the DOCX unreadable.
435    let clean_text = sanitize_xml_text(&run.text);
436    let mut t = BytesStart::new("w:t");
437    t.push_attribute(("xml:space", "preserve"));
438    w.write_event(Event::Start(t))?;
439    w.write_event(Event::Text(BytesText::new(&clean_text)))?;
440    w.write_event(Event::End(BytesEnd::new("w:t")))?;
441
442    w.write_event(Event::End(BytesEnd::new("w:r")))?;
443    Ok(())
444}
445
446fn write_table(w: &mut Writer<&mut Cursor<Vec<u8>>>, table: &Table) -> Result<()> {
447    w.write_event(Event::Start(BytesStart::new("w:tbl")))?;
448
449    // Table properties
450    w.write_event(Event::Start(BytesStart::new("w:tblPr")))?;
451
452    let mut style = BytesStart::new("w:tblStyle");
453    style.push_attribute(("w:val", "TableGrid"));
454    w.write_event(Event::Empty(style))?;
455
456    let mut width = BytesStart::new("w:tblW");
457    width.push_attribute(("w:w", "0"));
458    width.push_attribute(("w:type", "auto"));
459    w.write_event(Event::Empty(width))?;
460
461    // Table borders
462    w.write_event(Event::Start(BytesStart::new("w:tblBorders")))?;
463    for border_name in &[
464        "w:top",
465        "w:left",
466        "w:bottom",
467        "w:right",
468        "w:insideH",
469        "w:insideV",
470    ] {
471        let mut b = BytesStart::new(*border_name);
472        b.push_attribute(("w:val", "single"));
473        b.push_attribute(("w:sz", "4"));
474        b.push_attribute(("w:space", "0"));
475        b.push_attribute(("w:color", "auto"));
476        w.write_event(Event::Empty(b))?;
477    }
478    w.write_event(Event::End(BytesEnd::new("w:tblBorders")))?;
479
480    w.write_event(Event::End(BytesEnd::new("w:tblPr")))?;
481
482    // Grid definition
483    w.write_event(Event::Start(BytesStart::new("w:tblGrid")))?;
484    let col_width = 9000 / table.col_count.max(1);
485    for _ in 0..table.col_count {
486        let mut gc = BytesStart::new("w:gridCol");
487        gc.push_attribute(("w:w", col_width.to_string().as_str()));
488        w.write_event(Event::Empty(gc))?;
489    }
490    w.write_event(Event::End(BytesEnd::new("w:tblGrid")))?;
491
492    // Rows
493    for row in &table.rows {
494        w.write_event(Event::Start(BytesStart::new("w:tr")))?;
495
496        for cell_text in row {
497            w.write_event(Event::Start(BytesStart::new("w:tc")))?;
498            w.write_event(Event::Start(BytesStart::new("w:p")))?;
499            w.write_event(Event::Start(BytesStart::new("w:r")))?;
500
501            let clean_cell = sanitize_xml_text(cell_text);
502            let mut t = BytesStart::new("w:t");
503            t.push_attribute(("xml:space", "preserve"));
504            w.write_event(Event::Start(t))?;
505            w.write_event(Event::Text(BytesText::new(&clean_cell)))?;
506            w.write_event(Event::End(BytesEnd::new("w:t")))?;
507
508            w.write_event(Event::End(BytesEnd::new("w:r")))?;
509            w.write_event(Event::End(BytesEnd::new("w:p")))?;
510            w.write_event(Event::End(BytesEnd::new("w:tc")))?;
511        }
512
513        w.write_event(Event::End(BytesEnd::new("w:tr")))?;
514    }
515
516    w.write_event(Event::End(BytesEnd::new("w:tbl")))?;
517    Ok(())
518}
519
520fn write_image_paragraph(
521    w: &mut Writer<&mut Cursor<Vec<u8>>>,
522    img: &DocxImage,
523    rid: &str,
524    idx: usize,
525) -> Result<()> {
526    let cx = (img.width as f64 / DEFAULT_DPI * EMU_PER_INCH as f64) as i64;
527    let cy = (img.height as f64 / DEFAULT_DPI * EMU_PER_INCH as f64) as i64;
528    let cx_str = cx.to_string();
529    let cy_str = cy.to_string();
530    let id_str = (idx + 1).to_string();
531    let name = format!("Image{}", idx + 1);
532
533    w.write_event(Event::Start(BytesStart::new("w:p")))?;
534    w.write_event(Event::Start(BytesStart::new("w:r")))?;
535    w.write_event(Event::Start(BytesStart::new("w:drawing")))?;
536
537    // Inline drawing
538    let mut inline = BytesStart::new("wp:inline");
539    inline.push_attribute(("distT", "0"));
540    inline.push_attribute(("distB", "0"));
541    inline.push_attribute(("distL", "0"));
542    inline.push_attribute(("distR", "0"));
543    w.write_event(Event::Start(inline))?;
544
545    // Extent
546    let mut extent = BytesStart::new("wp:extent");
547    extent.push_attribute(("cx", cx_str.as_str()));
548    extent.push_attribute(("cy", cy_str.as_str()));
549    w.write_event(Event::Empty(extent))?;
550
551    // DocPr
552    let mut doc_pr = BytesStart::new("wp:docPr");
553    doc_pr.push_attribute(("id", id_str.as_str()));
554    doc_pr.push_attribute(("name", name.as_str()));
555    w.write_event(Event::Empty(doc_pr))?;
556
557    // Graphic
558    let mut graphic = BytesStart::new("a:graphic");
559    graphic.push_attribute((
560        "xmlns:a",
561        "http://schemas.openxmlformats.org/drawingml/2006/main",
562    ));
563    w.write_event(Event::Start(graphic))?;
564
565    let mut gd = BytesStart::new("a:graphicData");
566    gd.push_attribute((
567        "uri",
568        "http://schemas.openxmlformats.org/drawingml/2006/picture",
569    ));
570    w.write_event(Event::Start(gd))?;
571
572    let mut pic = BytesStart::new("pic:pic");
573    pic.push_attribute((
574        "xmlns:pic",
575        "http://schemas.openxmlformats.org/drawingml/2006/picture",
576    ));
577    w.write_event(Event::Start(pic))?;
578
579    // Non-visual properties
580    w.write_event(Event::Start(BytesStart::new("pic:nvPicPr")))?;
581
582    let mut cnv = BytesStart::new("pic:cNvPr");
583    cnv.push_attribute(("id", id_str.as_str()));
584    cnv.push_attribute(("name", name.as_str()));
585    w.write_event(Event::Empty(cnv))?;
586
587    w.write_event(Event::Empty(BytesStart::new("pic:cNvPicPr")))?;
588
589    w.write_event(Event::End(BytesEnd::new("pic:nvPicPr")))?;
590
591    // Blip fill
592    w.write_event(Event::Start(BytesStart::new("pic:blipFill")))?;
593
594    let mut blip = BytesStart::new("a:blip");
595    blip.push_attribute(("r:embed", rid));
596    w.write_event(Event::Empty(blip))?;
597
598    w.write_event(Event::Start(BytesStart::new("a:stretch")))?;
599    w.write_event(Event::Empty(BytesStart::new("a:fillRect")))?;
600    w.write_event(Event::End(BytesEnd::new("a:stretch")))?;
601
602    w.write_event(Event::End(BytesEnd::new("pic:blipFill")))?;
603
604    // Shape properties
605    w.write_event(Event::Start(BytesStart::new("pic:spPr")))?;
606
607    let xfrm = BytesStart::new("a:xfrm");
608    w.write_event(Event::Start(xfrm))?;
609
610    let mut off = BytesStart::new("a:off");
611    off.push_attribute(("x", "0"));
612    off.push_attribute(("y", "0"));
613    w.write_event(Event::Empty(off))?;
614
615    let mut ext = BytesStart::new("a:ext");
616    ext.push_attribute(("cx", cx_str.as_str()));
617    ext.push_attribute(("cy", cy_str.as_str()));
618    w.write_event(Event::Empty(ext))?;
619
620    w.write_event(Event::End(BytesEnd::new("a:xfrm")))?;
621
622    let mut prst = BytesStart::new("a:prstGeom");
623    prst.push_attribute(("prst", "rect"));
624    w.write_event(Event::Start(prst))?;
625    w.write_event(Event::Empty(BytesStart::new("a:avLst")))?;
626    w.write_event(Event::End(BytesEnd::new("a:prstGeom")))?;
627
628    w.write_event(Event::End(BytesEnd::new("pic:spPr")))?;
629
630    w.write_event(Event::End(BytesEnd::new("pic:pic")))?;
631    w.write_event(Event::End(BytesEnd::new("a:graphicData")))?;
632    w.write_event(Event::End(BytesEnd::new("a:graphic")))?;
633    w.write_event(Event::End(BytesEnd::new("wp:inline")))?;
634    w.write_event(Event::End(BytesEnd::new("w:drawing")))?;
635    w.write_event(Event::End(BytesEnd::new("w:r")))?;
636    w.write_event(Event::End(BytesEnd::new("w:p")))?;
637    Ok(())
638}
639
640/// Strip characters forbidden in XML 1.0: U+0000–U+0008, U+000B, U+000C,
641/// U+000E–U+001F.  Tab (U+0009), LF (U+000A), and CR (U+000D) are allowed.
642fn sanitize_xml_text(text: &str) -> String {
643    if text
644        .bytes()
645        .all(|b| b >= 0x20 || b == b'\t' || b == b'\n' || b == b'\r')
646    {
647        return text.to_string();
648    }
649    text.chars()
650        .filter(|&c| {
651            let cp = c as u32;
652            cp >= 0x20 || cp == 0x09 || cp == 0x0A || cp == 0x0D
653        })
654        .collect()
655}
656
657fn image_extension(content_type: &str) -> &str {
658    match content_type {
659        "image/jpeg" => "jpeg",
660        "image/png" => "png",
661        "image/gif" => "gif",
662        "image/tiff" => "tiff",
663        _ => "png",
664    }
665}
666
667#[cfg(test)]
668mod tests {
669    use super::*;
670
671    #[test]
672    fn write_minimal_docx() {
673        let para = Paragraph {
674            runs: vec![Run {
675                text: "Hello World".to_string(),
676                font_name: String::new(),
677                font_size: 12.0,
678                bold: false,
679                italic: false,
680            }],
681        };
682
683        let pages = vec![vec![PageElement::Para(para)]];
684        let mut output = Vec::new();
685        write_docx(&pages, &[], &mut output).unwrap();
686
687        // Verify it's a valid ZIP file.
688        assert!(output.len() > 100);
689        assert_eq!(&output[0..2], b"PK");
690    }
691
692    #[test]
693    fn write_docx_with_table() {
694        let table = Table {
695            rows: vec![
696                vec!["A".to_string(), "B".to_string()],
697                vec!["1".to_string(), "2".to_string()],
698            ],
699            col_count: 2,
700        };
701
702        let pages = vec![vec![PageElement::Tbl(table)]];
703        let mut output = Vec::new();
704        write_docx(&pages, &[], &mut output).unwrap();
705        assert!(output.len() > 100);
706    }
707
708    #[test]
709    fn write_docx_with_formatting() {
710        let para = Paragraph {
711            runs: vec![
712                Run {
713                    text: "Bold ".to_string(),
714                    font_name: "Arial-Bold".to_string(),
715                    font_size: 14.0,
716                    bold: true,
717                    italic: false,
718                },
719                Run {
720                    text: "Italic".to_string(),
721                    font_name: "Times-Italic".to_string(),
722                    font_size: 12.0,
723                    bold: false,
724                    italic: true,
725                },
726            ],
727        };
728
729        let pages = vec![vec![PageElement::Para(para)]];
730        let mut output = Vec::new();
731        write_docx(&pages, &[], &mut output).unwrap();
732        assert!(output.len() > 100);
733    }
734}