use std::io::Write;
use anyhow::Result;
use zip::write::SimpleFileOptions;
use crate::manuscript::{header_keyword, is_scene_break, round_word_count, ManuscriptChapter, ManuscriptMeta};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DocxFont {
TimesNewRoman,
Courier,
}
impl DocxFont {
pub fn parse(s: &str) -> Option<Self> {
match s.trim().to_ascii_lowercase().replace([' ', '-', '_'], "").as_str() {
"times" | "timesnewroman" | "serif" => Some(Self::TimesNewRoman),
"courier" | "couriernew" | "mono" | "monospace" => Some(Self::Courier),
_ => None,
}
}
fn name(self) -> &'static str {
match self {
Self::TimesNewRoman => "Times New Roman",
Self::Courier => "Courier New",
}
}
}
const HALF_PT_12: &str = "24"; const TWIPS_INCH: u32 = 1440; const DOUBLE_LINE: &str = "480";
pub fn build_docx(
meta: &ManuscriptMeta,
chapters: &[ManuscriptChapter],
font: DocxFont,
) -> Result<Vec<u8>> {
let mut buf: Vec<u8> = Vec::new();
{
let cursor = std::io::Cursor::new(&mut buf);
let mut zw = zip::ZipWriter::new(cursor);
let opts = SimpleFileOptions::default();
let mut put = |name: &str, body: &str| -> Result<()> {
zw.start_file(name, opts)?;
zw.write_all(body.as_bytes())?;
Ok(())
};
put("[Content_Types].xml", CONTENT_TYPES)?;
put("_rels/.rels", ROOT_RELS)?;
put("word/_rels/document.xml.rels", DOC_RELS)?;
put("word/styles.xml", &styles_xml(font))?;
put("word/header2.xml", &header_xml(&meta.surname, &meta.title))?;
put("word/document.xml", &document_xml(meta, chapters))?;
zw.finish()?;
}
Ok(buf)
}
fn document_xml(meta: &ManuscriptMeta, chapters: &[ManuscriptChapter]) -> String {
let mut b = String::new();
b.push_str(
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n\
<w:document xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" \
xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\">\n<w:body>\n",
);
for line in meta.contact.lines() {
b.push_str(¶(line, &[Prop::SingleSpace, Prop::Left]));
}
b.push_str(¶(
&format!("approx. {} words", round_word_count(meta.word_count)),
&[Prop::SingleSpace, Prop::Right],
));
for _ in 0..8 {
b.push_str(¶("", &[Prop::SingleSpace, Prop::Center]));
}
b.push_str(¶(&meta.title.to_uppercase(), &[Prop::Center]));
b.push_str(¶("", &[Prop::Center]));
b.push_str(¶(&format!("by {}", meta.byline), &[Prop::Center]));
for ch in chapters {
b.push_str(¶(&ch.title.to_uppercase(), &[Prop::PageBreakBefore, Prop::Center]));
b.push_str(¶("", &[Prop::Center]));
for p in &ch.paragraphs {
if is_scene_break(p) {
b.push_str(¶("#", &[Prop::Center]));
} else {
b.push_str(¶(p, &[Prop::FirstLineIndent]));
}
}
}
b.push_str(&format!(
"<w:sectPr>\
<w:headerReference w:type=\"default\" r:id=\"rIdHeader\"/>\
<w:titlePg/>\
<w:pgSz w:w=\"12240\" w:h=\"15840\"/>\
<w:pgMar w:top=\"{m}\" w:right=\"{m}\" w:bottom=\"{m}\" w:left=\"{m}\" \
w:header=\"720\" w:footer=\"720\" w:gutter=\"0\"/>\
</w:sectPr>\n",
m = TWIPS_INCH,
));
b.push_str("</w:body>\n</w:document>\n");
b
}
enum Prop {
SingleSpace,
Left,
Right,
Center,
FirstLineIndent,
PageBreakBefore,
}
fn para(text: &str, props: &[Prop]) -> String {
let mut ppr = String::new();
if props.iter().any(|p| matches!(p, Prop::PageBreakBefore)) {
ppr.push_str("<w:pageBreakBefore/>");
}
if props.iter().any(|p| matches!(p, Prop::SingleSpace)) {
ppr.push_str("<w:spacing w:line=\"240\" w:lineRule=\"auto\"/>");
}
if props.iter().any(|p| matches!(p, Prop::FirstLineIndent)) {
ppr.push_str("<w:ind w:firstLine=\"720\"/>");
}
let jc = props.iter().find_map(|p| match p {
Prop::Left => Some("left"),
Prop::Right => Some("right"),
Prop::Center => Some("center"),
_ => None,
});
if let Some(jc) = jc {
ppr.push_str(&format!("<w:jc w:val=\"{jc}\"/>"));
}
let ppr = if ppr.is_empty() {
String::new()
} else {
format!("<w:pPr>{ppr}</w:pPr>")
};
let run = if text.is_empty() {
String::new()
} else {
format!("<w:r><w:t xml:space=\"preserve\">{}</w:t></w:r>", xml_escape(text))
};
format!("<w:p>{ppr}{run}</w:p>\n")
}
fn header_xml(surname: &str, title: &str) -> String {
let label = format!("{} / {} / ", surname, header_keyword(title));
format!(
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n\
<w:hdr xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\">\
<w:p><w:pPr><w:jc w:val=\"right\"/></w:pPr>\
<w:r><w:t xml:space=\"preserve\">{label}</w:t></w:r>\
<w:r><w:fldChar w:fldCharType=\"begin\"/></w:r>\
<w:r><w:instrText xml:space=\"preserve\"> PAGE </w:instrText></w:r>\
<w:r><w:fldChar w:fldCharType=\"end\"/></w:r>\
</w:p></w:hdr>\n",
label = xml_escape(&label),
)
}
fn styles_xml(font: DocxFont) -> String {
format!(
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n\
<w:styles xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\">\
<w:docDefaults><w:rPrDefault><w:rPr>\
<w:rFonts w:ascii=\"{f}\" w:hAnsi=\"{f}\" w:cs=\"{f}\"/>\
<w:sz w:val=\"{sz}\"/><w:szCs w:val=\"{sz}\"/></w:rPr></w:rPrDefault>\
<w:pPrDefault><w:pPr><w:spacing w:line=\"{line}\" w:lineRule=\"auto\"/></w:pPr></w:pPrDefault>\
</w:docDefaults>\
<w:style w:type=\"paragraph\" w:default=\"1\" w:styleId=\"Normal\"><w:name w:val=\"Normal\"/></w:style>\
</w:styles>\n",
f = font.name(),
sz = HALF_PT_12,
line = DOUBLE_LINE,
)
}
const CONTENT_TYPES: &str = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n\
<Types xmlns=\"http://schemas.openxmlformats.org/package/2006/content-types\">\
<Default Extension=\"rels\" ContentType=\"application/vnd.openxmlformats-package.relationships+xml\"/>\
<Default Extension=\"xml\" ContentType=\"application/xml\"/>\
<Override PartName=\"/word/document.xml\" ContentType=\"application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml\"/>\
<Override PartName=\"/word/styles.xml\" ContentType=\"application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml\"/>\
<Override PartName=\"/word/header2.xml\" ContentType=\"application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml\"/>\
</Types>\n";
const ROOT_RELS: &str = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n\
<Relationships xmlns=\"http://schemas.openxmlformats.org/package/2006/relationships\">\
<Relationship Id=\"rId1\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument\" Target=\"word/document.xml\"/>\
</Relationships>\n";
const DOC_RELS: &str = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n\
<Relationships xmlns=\"http://schemas.openxmlformats.org/package/2006/relationships\">\
<Relationship Id=\"rIdStyles\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles\" Target=\"styles.xml\"/>\
<Relationship Id=\"rIdHeader\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/header\" Target=\"header2.xml\"/>\
</Relationships>\n";
fn xml_escape(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for c in s.chars() {
match c {
'&' => out.push_str("&"),
'<' => out.push_str("<"),
'>' => out.push_str(">"),
'"' => out.push_str("""),
'\'' => out.push_str("'"),
_ => out.push(c),
}
}
out
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Read;
fn sample() -> (ManuscriptMeta, Vec<ManuscriptChapter>) {
let meta = ManuscriptMeta {
title: "The Harbor Code".into(),
contact: "Jane Writer\n12 Wharf Rd\njane@example.com".into(),
byline: "Jane Writer".into(),
surname: "Writer".into(),
word_count: 80_123,
};
let chapters = vec![
ManuscriptChapter {
title: "Chapter One".into(),
paragraphs: vec![
"The harbor was quiet that morning.".into(),
"* * *".into(),
"By noon it was not.".into(),
],
},
ManuscriptChapter {
title: "Chapter Two".into(),
paragraphs: vec!["A new day, & a new <tide>.".into()],
},
];
(meta, chapters)
}
fn part(bytes: &[u8], name: &str) -> String {
let mut zip = zip::ZipArchive::new(std::io::Cursor::new(bytes)).expect("valid zip");
let mut f = zip.by_name(name).unwrap_or_else(|_| panic!("missing part {name}"));
let mut s = String::new();
f.read_to_string(&mut s).unwrap();
s
}
#[test]
fn package_has_every_required_part() {
let (m, c) = sample();
let bytes = build_docx(&m, &c, DocxFont::TimesNewRoman).unwrap();
for p in [
"[Content_Types].xml",
"_rels/.rels",
"word/_rels/document.xml.rels",
"word/styles.xml",
"word/header2.xml",
"word/document.xml",
] {
let _ = part(&bytes, p); }
}
#[test]
fn styles_carry_font_and_double_spacing() {
let (m, c) = sample();
let times = build_docx(&m, &c, DocxFont::TimesNewRoman).unwrap();
let s = part(×, "word/styles.xml");
assert!(s.contains("Times New Roman"));
assert!(s.contains("w:line=\"480\""), "double spacing (480 twips)");
assert!(s.contains("w:sz w:val=\"24\""), "12 pt");
let cour = build_docx(&m, &c, DocxFont::Courier).unwrap();
assert!(part(&cour, "word/styles.xml").contains("Courier New"));
}
#[test]
fn header_has_keyword_and_live_page_field() {
let (m, c) = sample();
let h = part(&build_docx(&m, &c, DocxFont::TimesNewRoman).unwrap(), "word/header2.xml");
assert!(h.contains("Writer / HARBOR / "), "running-header label");
assert!(h.contains("instrText") && h.contains(" PAGE "), "live page field");
}
#[test]
fn document_has_titlepage_header_pagebreaks_and_scene_break() {
let (m, c) = sample();
let d = part(&build_docx(&m, &c, DocxFont::TimesNewRoman).unwrap(), "word/document.xml");
assert!(d.contains("<w:titlePg/>"), "title page suppresses p1 header");
assert!(d.contains("rIdHeader"), "section references the header");
assert!(d.contains("approx. 80000 words"), "rounded word count on title page");
assert_eq!(d.matches("<w:pageBreakBefore/>").count(), 2);
assert!(d.contains("<w:t xml:space=\"preserve\">#</w:t>"));
assert!(d.contains("& a new <tide>"));
assert!(quick_xml_well_formed(&d), "document.xml is well-formed");
}
fn quick_xml_well_formed(xml: &str) -> bool {
use quick_xml::events::Event;
use quick_xml::reader::Reader;
let mut r = Reader::from_str(xml);
loop {
match r.read_event() {
Ok(Event::Eof) => return true,
Err(_) => return false,
_ => {}
}
}
}
#[test]
#[ignore = "writes a file for manual inspection"]
fn emit_sample_docx_for_manual_word_check() {
let (m, c) = sample();
let bytes = build_docx(&m, &c, DocxFont::TimesNewRoman).unwrap();
let path = "/tmp/inkhaven-shunn-sample.docx";
std::fs::write(path, &bytes).unwrap();
println!("wrote {} ({} bytes) — open in Word and check:", path, bytes.len());
println!(" - page 1 (title page) has NO running header");
println!(" - page 2+ header reads 'Writer / HARBOR / <n>' (live page #)");
println!(" - body is double-spaced 12pt Times New Roman");
println!(" - each chapter starts on a fresh page; scene break is a centred #");
}
}