use std::io::Write;
fn make_docx(document_xml: &str) -> Vec<u8> {
let buf = std::io::Cursor::new(Vec::new());
let mut zip = zip::ZipWriter::new(buf);
let options = zip::write::SimpleFileOptions::default()
.compression_method(zip::CompressionMethod::Deflated);
zip.start_file("[Content_Types].xml", options).unwrap();
zip.write_all(
br#"<?xml version="1.0" encoding="UTF-8"?>
<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
<Default Extension="xml" ContentType="application/xml"/>
<Override PartName="/word/document.xml"
ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
</Types>"#,
)
.unwrap();
zip.start_file("_rels/.rels", options).unwrap();
zip.write_all(
br#"<?xml version="1.0" encoding="UTF-8"?>
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
<Relationship Id="rId1"
Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"
Target="word/document.xml"/>
</Relationships>"#,
)
.unwrap();
zip.start_file("word/document.xml", options).unwrap();
zip.write_all(document_xml.as_bytes()).unwrap();
let cursor = zip.finish().unwrap();
cursor.into_inner()
}
fn simple_docx(body_content: &str) -> Vec<u8> {
let xml = format!(
r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
<w:body>{body_content}</w:body>
</w:document>"#
);
make_docx(&xml)
}
#[test]
fn convert_simple_docx_to_pdf() {
let docx = simple_docx(r#"<w:p><w:r><w:t>Hello World</w:t></w:r></w:p>"#);
let pdf = dxpdf::convert(&docx).unwrap();
assert!(pdf.len() > 4);
assert_eq!(&pdf[..5], b"%PDF-");
}
#[test]
fn convert_formatted_docx_to_pdf() {
let docx = simple_docx(
r#"<w:p>
<w:pPr><w:jc w:val="center"/></w:pPr>
<w:r>
<w:rPr>
<w:b/>
<w:i/>
<w:sz w:val="36"/>
<w:color w:val="0000FF"/>
<w:rFonts w:ascii="Times New Roman"/>
</w:rPr>
<w:t>Formatted Title</w:t>
</w:r>
</w:p>
<w:p>
<w:r><w:t>Normal paragraph text.</w:t></w:r>
</w:p>"#,
);
let pdf = dxpdf::convert(&docx).unwrap();
assert_eq!(&pdf[..5], b"%PDF-");
}
#[test]
fn convert_table_docx_to_pdf() {
let docx = simple_docx(
r#"<w:tbl>
<w:tr>
<w:tc><w:p><w:r><w:t>Cell A1</w:t></w:r></w:p></w:tc>
<w:tc><w:p><w:r><w:t>Cell B1</w:t></w:r></w:p></w:tc>
</w:tr>
<w:tr>
<w:tc><w:p><w:r><w:t>Cell A2</w:t></w:r></w:p></w:tc>
<w:tc><w:p><w:r><w:t>Cell B2</w:t></w:r></w:p></w:tc>
</w:tr>
</w:tbl>"#,
);
let pdf = dxpdf::convert(&docx).unwrap();
assert_eq!(&pdf[..5], b"%PDF-");
}
#[test]
fn convert_empty_document() {
let docx = simple_docx("");
let pdf = dxpdf::convert(&docx).unwrap();
assert_eq!(&pdf[..5], b"%PDF-");
}
#[test]
fn convert_writes_to_file() {
let docx = simple_docx(r#"<w:p><w:r><w:t>File test</w:t></w:r></w:p>"#);
let pdf = dxpdf::convert(&docx).unwrap();
let dir = tempfile::tempdir().unwrap();
let out_path = dir.path().join("test.pdf");
std::fs::write(&out_path, &pdf).unwrap();
let read_back = std::fs::read(&out_path).unwrap();
assert_eq!(read_back, pdf);
}
#[test]
fn parse_invalid_zip_returns_error() {
let result = dxpdf::convert(b"not a zip file");
assert!(result.is_err());
}
#[test]
fn parse_zip_without_document_xml_returns_error() {
let buf = std::io::Cursor::new(Vec::new());
let mut zip = zip::ZipWriter::new(buf);
let options = zip::write::SimpleFileOptions::default();
zip.start_file("dummy.txt", options).unwrap();
zip.write_all(b"hello").unwrap();
let cursor = zip.finish().unwrap();
let bytes = cursor.into_inner();
let result = dxpdf::convert(&bytes);
assert!(result.is_err());
let err = result.unwrap_err().to_string();
assert!(
err.contains("missing required part"),
"Error should mention a missing part: {err}"
);
}
#[test]
fn whitespace_only_run_with_xml_space_preserve_roundtrips_to_space() {
use dxpdf::model::{Block, Inline, RunElement};
let docx = simple_docx(
r#"<w:p>
<w:r><w:rPr><w:b/></w:rPr><w:t>Label:</w:t></w:r>
<w:r><w:t xml:space="preserve"> </w:t></w:r>
<w:r><w:t>Value</w:t></w:r>
</w:p>"#,
);
let document = dxpdf::docx::parse(&docx).expect("parse");
let para = match document.body.first().expect("at least one block") {
Block::Paragraph(p) => p,
other => panic!("expected paragraph, got {other:?}"),
};
let texts: Vec<&str> = para
.content
.iter()
.filter_map(|inline| match inline {
Inline::TextRun(tr) => tr.content.iter().find_map(|el| match el {
RunElement::Text(t) => Some(t.as_str()),
_ => None,
}),
_ => None,
})
.collect();
assert_eq!(
texts,
vec!["Label:", " ", "Value"],
"whitespace-only run must survive parsing as a literal space"
);
}
#[test]
fn convert_multi_paragraph_docx() {
let mut body = String::new();
for i in 0..50 {
body.push_str(&format!(
r#"<w:p>
<w:pPr><w:spacing w:before="100" w:after="100"/></w:pPr>
<w:r><w:t>Paragraph number {i} with some text content to make it wider.</w:t></w:r>
</w:p>"#
));
}
let docx = simple_docx(&body);
let pdf = dxpdf::convert(&docx).unwrap();
assert_eq!(&pdf[..5], b"%PDF-");
assert!(pdf.len() > 100);
}
#[test]
fn grid_before_offsets_each_row_first_cell() {
use dxpdf::render::layout::draw_command::DrawCommand;
let docx = simple_docx(
r#"<w:tbl>
<w:tblPr>
<w:tblW w:w="10065" w:type="dxa"/>
<w:tblLayout w:type="fixed"/>
</w:tblPr>
<w:tblGrid>
<w:gridCol w:w="38"/>
<w:gridCol w:w="2905"/>
<w:gridCol w:w="6872"/>
<w:gridCol w:w="250"/>
</w:tblGrid>
<w:tr>
<w:trPr>
<w:gridBefore w:val="1"/><w:gridAfter w:val="1"/>
<w:wBefore w:w="38" w:type="dxa"/>
<w:wAfter w:w="250" w:type="dxa"/>
</w:trPr>
<w:tc>
<w:tcPr><w:tcW w:w="2905" w:type="dxa"/></w:tcPr>
<w:p><w:r><w:t>LeftA</w:t></w:r></w:p>
</w:tc>
<w:tc>
<w:tcPr><w:tcW w:w="6872" w:type="dxa"/></w:tcPr>
<w:p><w:r><w:t>RightA</w:t></w:r></w:p>
</w:tc>
</w:tr>
<w:tr>
<w:trPr>
<w:gridBefore w:val="1"/><w:gridAfter w:val="1"/>
<w:wBefore w:w="38" w:type="dxa"/>
<w:wAfter w:w="250" w:type="dxa"/>
</w:trPr>
<w:tc>
<w:tcPr><w:tcW w:w="2905" w:type="dxa"/></w:tcPr>
<w:p><w:r><w:t>LeftB</w:t></w:r></w:p>
</w:tc>
<w:tc>
<w:tcPr><w:tcW w:w="6872" w:type="dxa"/></w:tcPr>
<w:p><w:r><w:t>RightB</w:t></w:r></w:p>
</w:tc>
</w:tr>
</w:tbl>"#,
);
let document = dxpdf::docx::parse(&docx).expect("parse");
let (_, pages) = dxpdf::render::resolve_and_layout(&document);
let cmds: Vec<&DrawCommand> = pages.iter().flat_map(|p| p.commands.iter()).collect();
let position_of = |needle: &str| -> Option<(f32, f32)> {
cmds.iter().find_map(|c| match c {
DrawCommand::Text { position, text, .. } if text.as_ref() == needle => {
Some((position.x.raw(), position.y.raw()))
}
_ => None,
})
};
let (x_la, y_la) = position_of("LeftA").expect("LeftA present");
let (x_ra, y_ra) = position_of("RightA").expect("RightA present");
let (x_lb, y_lb) = position_of("LeftB").expect("LeftB present");
let (x_rb, y_rb) = position_of("RightB").expect("RightB present");
assert!(
(x_la - x_lb).abs() < 0.01,
"LeftA ({x_la}) and LeftB ({x_lb}) must share the same x — both \
rows declare gridBefore=1 so the first cell starts at the same \
absolute grid column"
);
assert!(
(x_ra - x_rb).abs() < 0.01,
"RightA ({x_ra}) and RightB ({x_rb}) must share the same x"
);
assert!(y_la == y_ra, "row 0 cells share the same y baseline");
assert!(y_lb == y_rb, "row 1 cells share the same y baseline");
assert!(y_lb > y_la, "row 1 sits below row 0");
assert!(
x_ra - x_la > 50.0,
"RightA ({x_ra}) must be well to the right of LeftA ({x_la}) — \
small separation indicates gridBefore was ignored and the right \
column overlapped the left"
);
}