pub mod body;
pub mod body_schema;
pub mod drawing;
pub mod fonts;
pub mod notes;
pub mod numbering;
pub mod primitives;
pub mod properties;
pub mod rel_rewrite;
pub mod serde_xml;
pub mod settings;
pub mod styles;
pub mod theme;
pub mod vml;
use std::collections::HashMap;
use crate::docx::error::{ParseError, Result};
use crate::docx::model::*;
use crate::docx::relationships::{RelationshipType, Relationships};
use crate::docx::zip::{self, PackageContents};
pub fn parse(data: &[u8]) -> Result<Document> {
let mut package = PackageContents::from_bytes(data)?;
let pkg_rels_data = package.require_part("_rels/.rels")?;
let pkg_rels = Relationships::parse(pkg_rels_data)?;
let doc_rel = pkg_rels
.find_by_type(&RelationshipType::OfficeDocument)
.ok_or_else(|| ParseError::MissingPart("officeDocument relationship".into()))?;
let doc_path = zip::resolve_target("", &doc_rel.target);
let doc_dir = zip::part_directory(&doc_path);
let doc_rels_path = zip::rels_path_for(&doc_path);
let doc_rels = if let Some(data) = package.get_part(&doc_rels_path) {
Relationships::parse(data)?
} else {
Relationships::default()
};
let theme = if let Some(theme_rel) = doc_rels.find_by_type(&RelationshipType::Theme) {
let theme_path = zip::resolve_target(doc_dir, &theme_rel.target);
if let Some(data) = package.get_part(&theme_path) {
Some(theme::parse_theme(data)?)
} else {
None
}
} else {
None
};
let style_sheet = if let Some(styles_rel) = doc_rels.find_by_type(&RelationshipType::Styles) {
let styles_path = zip::resolve_target(doc_dir, &styles_rel.target);
let data = package.require_part(&styles_path)?;
styles::parse_styles(data)?
} else {
StyleSheet::default()
};
let numbering_defs = if let Some(num_rel) = doc_rels.find_by_type(&RelationshipType::Numbering)
{
let num_path = zip::resolve_target(doc_dir, &num_rel.target);
if let Some(data) = package.get_part(&num_path) {
numbering::parse_numbering(data)?
} else {
NumberingDefinitions::default()
}
} else {
NumberingDefinitions::default()
};
let doc_settings =
if let Some(settings_rel) = doc_rels.find_by_type(&RelationshipType::Settings) {
let settings_path = zip::resolve_target(doc_dir, &settings_rel.target);
if let Some(data) = package.get_part(&settings_path) {
settings::parse_settings(data)?
} else {
DocumentSettings::default()
}
} else {
DocumentSettings::default()
};
let mut media = HashMap::new();
for rel in doc_rels.filter_by_type(&RelationshipType::Image) {
let media_path = zip::resolve_target(doc_dir, &rel.target);
if let Some(data) = package.take_part(&media_path) {
let fmt = ImageFormat::detect(&rel.target, &data);
media.insert(rel.id.clone(), (data, fmt));
}
}
if let Some(num_rel) = doc_rels.find_by_type(&RelationshipType::Numbering) {
let num_path = zip::resolve_target(doc_dir, &num_rel.target);
let num_dir = zip::part_directory(&num_path);
let num_rels_path = zip::rels_path_for(&num_path);
if let Some(rels_data) = package.get_part(&num_rels_path) {
let num_rels = Relationships::parse(rels_data)?;
for rel in num_rels.filter_by_type(&RelationshipType::Image) {
let img_path = zip::resolve_target(num_dir, &rel.target);
if let Some(data) = package.take_part(&img_path) {
let fmt = ImageFormat::detect(&rel.target, &data);
media.insert(rel.id.clone(), (data, fmt));
}
}
}
}
let embedded_fonts = if let Some(ft_rel) = doc_rels.find_by_type(&RelationshipType::FontTable) {
let ft_path = zip::resolve_target(doc_dir, &ft_rel.target);
let ft_dir = zip::part_directory(&ft_path);
let ft_rels_path = zip::rels_path_for(&ft_path);
let ft_data = package.take_part(&ft_path);
let ft_rels_data = package.take_part(&ft_rels_path);
if let Some(ft_data) = ft_data {
let ft_rels = if let Some(rd) = ft_rels_data {
Relationships::parse(&rd)?
} else {
Relationships::default()
};
fonts::parse_embedded_fonts(&ft_data, &ft_rels, &mut package, ft_dir)?
} else {
Vec::new()
}
} else {
Vec::new()
};
let doc_data = package.require_part(&doc_path)?;
let (mut body_blocks, final_section) = body::parse_body(doc_data)?;
let mut headers = HashMap::new();
let mut footers = HashMap::new();
for rel in doc_rels.filter_by_type(&RelationshipType::Header) {
let path = zip::resolve_target(doc_dir, &rel.target);
if let Some(data) = package.get_part(&path) {
let mut blocks = body::parse_blocks(data)?;
let remap = load_part_rel_remap(&path, &mut package, &mut media)?;
rel_rewrite::rewrite_part_rels_in_blocks(&mut blocks, &remap);
headers.insert(rel.id.clone(), blocks);
}
}
for rel in doc_rels.filter_by_type(&RelationshipType::Footer) {
let path = zip::resolve_target(doc_dir, &rel.target);
if let Some(data) = package.get_part(&path) {
let mut blocks = body::parse_blocks(data)?;
let remap = load_part_rel_remap(&path, &mut package, &mut media)?;
rel_rewrite::rewrite_part_rels_in_blocks(&mut blocks, &remap);
footers.insert(rel.id.clone(), blocks);
}
}
let mut footnotes = HashMap::new();
if let Some(fn_rel) = doc_rels.find_by_type(&RelationshipType::Footnotes) {
let path = zip::resolve_target(doc_dir, &fn_rel.target);
if let Some(data) = package.get_part(&path) {
footnotes = notes::parse_notes(data, "footnote")?;
let remap = load_part_rel_remap(&path, &mut package, &mut media)?;
for blocks in footnotes.values_mut() {
rel_rewrite::rewrite_part_rels_in_blocks(blocks, &remap);
}
}
}
let mut endnotes = HashMap::new();
if let Some(en_rel) = doc_rels.find_by_type(&RelationshipType::Endnotes) {
let path = zip::resolve_target(doc_dir, &en_rel.target);
if let Some(data) = package.get_part(&path) {
endnotes = notes::parse_notes(data, "endnote")?;
let remap = load_part_rel_remap(&path, &mut package, &mut media)?;
for blocks in endnotes.values_mut() {
rel_rewrite::rewrite_part_rels_in_blocks(blocks, &remap);
}
}
}
resolve_hyperlinks(&mut body_blocks, &doc_rels);
Ok(Document {
settings: doc_settings,
theme,
styles: style_sheet,
numbering: numbering_defs,
body: body_blocks,
final_section,
headers,
footers,
footnotes,
endnotes,
media,
embedded_fonts,
})
}
fn load_part_rel_remap(
part_path: &str,
package: &mut PackageContents,
media: &mut HashMap<RelId, (Vec<u8>, ImageFormat)>,
) -> Result<HashMap<RelId, RelId>> {
let mut remap: HashMap<RelId, RelId> = HashMap::new();
let rels_path = zip::rels_path_for(part_path);
let Some(rels_data) = package.get_part(&rels_path) else {
return Ok(remap);
};
let rels = Relationships::parse(rels_data)?;
for img_rel in rels.filter_by_type(&RelationshipType::Image) {
let img_path = zip::resolve_target(zip::part_directory(part_path), &img_rel.target);
if let Some(img_data) = package.get_part(&img_path).map(<[u8]>::to_vec) {
let fmt = ImageFormat::detect(&img_rel.target, &img_data);
let unique_id = RelId::new(format!("{}::{}", part_path, img_rel.id.as_str()));
media.insert(unique_id.clone(), (img_data, fmt));
remap.insert(img_rel.id.clone(), unique_id);
}
}
for link_rel in rels.filter_by_type(&RelationshipType::Hyperlink) {
remap.insert(link_rel.id.clone(), RelId::new(link_rel.target.clone()));
}
Ok(remap)
}
fn resolve_hyperlinks(blocks: &mut [Block], rels: &crate::docx::relationships::Relationships) {
for block in blocks {
match block {
Block::Paragraph(p) => resolve_hyperlinks_in_inlines(&mut p.content, rels),
Block::Table(t) => {
for row in &mut t.rows {
for cell in &mut row.cells {
resolve_hyperlinks(&mut cell.content, rels);
}
}
}
_ => {}
}
}
}
fn resolve_hyperlinks_in_inlines(
inlines: &mut [crate::model::Inline],
rels: &crate::docx::relationships::Relationships,
) {
use crate::model::{HyperlinkTarget, Inline, RelId};
for inline in inlines {
match inline {
Inline::Hyperlink(link) => {
if let HyperlinkTarget::External(ref rel_id) = link.target {
if let Some(rel) = rels.find_by_id(rel_id.as_str()) {
link.target = HyperlinkTarget::External(RelId::new(&rel.target));
}
}
resolve_hyperlinks_in_inlines(&mut link.content, rels);
}
Inline::Field(field) => {
resolve_hyperlinks_in_inlines(&mut field.content, rels);
}
Inline::AlternateContent(ac) => {
if let Some(ref mut fallback) = ac.fallback {
resolve_hyperlinks_in_inlines(fallback, rels);
}
}
_ => {}
}
}
}