pub mod body;
pub mod drawing;
pub mod fonts;
pub mod notes;
pub mod numbering;
pub mod properties;
pub mod settings;
pub mod styles;
pub mod theme;
pub mod vml;
use std::collections::HashMap;
use crate::docx::error::{ParseError, Result};
use crate::docx::model::*;
use crate::docx::zip::{self, PackageContents, RelationshipType, Relationships};
pub fn parse(data: &[u8]) -> Result<Document> {
let mut package = PackageContents::from_bytes(data)?;
let pkg_rels_data = package.require_part("_rels/.rels")?;
let pkg_rels = Relationships::parse(pkg_rels_data)?;
let doc_rel = pkg_rels
.find_by_type(&RelationshipType::OfficeDocument)
.ok_or_else(|| ParseError::MissingPart("officeDocument relationship".into()))?;
let doc_path = zip::resolve_target("", &doc_rel.target);
let doc_dir = zip::part_directory(&doc_path);
let doc_rels_path = zip::rels_path_for(&doc_path);
let doc_rels = if let Some(data) = package.get_part(&doc_rels_path) {
Relationships::parse(data)?
} else {
Relationships::default()
};
let theme = if let Some(theme_rel) = doc_rels.find_by_type(&RelationshipType::Theme) {
let theme_path = zip::resolve_target(doc_dir, &theme_rel.target);
if let Some(data) = package.get_part(&theme_path) {
Some(theme::parse_theme(data)?)
} else {
None
}
} else {
None
};
let style_sheet = if let Some(styles_rel) = doc_rels.find_by_type(&RelationshipType::Styles) {
let styles_path = zip::resolve_target(doc_dir, &styles_rel.target);
let data = package.require_part(&styles_path)?;
styles::parse_styles(data)?
} else {
StyleSheet::default()
};
let numbering_defs = if let Some(num_rel) = doc_rels.find_by_type(&RelationshipType::Numbering)
{
let num_path = zip::resolve_target(doc_dir, &num_rel.target);
if let Some(data) = package.get_part(&num_path) {
numbering::parse_numbering(data)?
} else {
NumberingDefinitions::default()
}
} else {
NumberingDefinitions::default()
};
let doc_settings =
if let Some(settings_rel) = doc_rels.find_by_type(&RelationshipType::Settings) {
let settings_path = zip::resolve_target(doc_dir, &settings_rel.target);
if let Some(data) = package.get_part(&settings_path) {
settings::parse_settings(data)?
} else {
DocumentSettings::default()
}
} else {
DocumentSettings::default()
};
let mut media = HashMap::new();
for rel in doc_rels.filter_by_type(&RelationshipType::Image) {
let media_path = zip::resolve_target(doc_dir, &rel.target);
if let Some(data) = package.take_part(&media_path) {
let fmt = ImageFormat::detect(&rel.target, &data);
media.insert(rel.id.clone(), (data, fmt));
}
}
if let Some(num_rel) = doc_rels.find_by_type(&RelationshipType::Numbering) {
let num_path = zip::resolve_target(doc_dir, &num_rel.target);
let num_dir = zip::part_directory(&num_path);
let num_rels_path = zip::rels_path_for(&num_path);
if let Some(rels_data) = package.get_part(&num_rels_path) {
let num_rels = Relationships::parse(rels_data)?;
for rel in num_rels.filter_by_type(&RelationshipType::Image) {
let img_path = zip::resolve_target(num_dir, &rel.target);
if let Some(data) = package.take_part(&img_path) {
let fmt = ImageFormat::detect(&rel.target, &data);
media.insert(rel.id.clone(), (data, fmt));
}
}
}
}
let embedded_fonts = if let Some(ft_rel) = doc_rels.find_by_type(&RelationshipType::FontTable) {
let ft_path = zip::resolve_target(doc_dir, &ft_rel.target);
let ft_dir = zip::part_directory(&ft_path);
let ft_rels_path = zip::rels_path_for(&ft_path);
let ft_data = package.take_part(&ft_path);
let ft_rels_data = package.take_part(&ft_rels_path);
if let Some(ft_data) = ft_data {
let ft_rels = if let Some(rd) = ft_rels_data {
Relationships::parse(&rd)?
} else {
Relationships::default()
};
fonts::parse_embedded_fonts(&ft_data, &ft_rels, &mut package, ft_dir)?
} else {
Vec::new()
}
} else {
Vec::new()
};
let doc_data = package.require_part(&doc_path)?;
let (mut body_blocks, final_section) = body::parse_body(doc_data)?;
let mut headers = HashMap::new();
let mut footers = HashMap::new();
for rel in doc_rels.filter_by_type(&RelationshipType::Header) {
let path = zip::resolve_target(doc_dir, &rel.target);
if let Some(data) = package.get_part(&path) {
let blocks = body::parse_blocks(data)?;
headers.insert(rel.id.clone(), blocks);
let hf_rels_path = zip::rels_path_for(&path);
if let Some(rd) = package.get_part(&hf_rels_path) {
let hf_rels = Relationships::parse(rd)?;
for img_rel in hf_rels.filter_by_type(&RelationshipType::Image) {
let img_path = zip::resolve_target(zip::part_directory(&path), &img_rel.target);
if let Some(img_data) = package.take_part(&img_path) {
let fmt = ImageFormat::detect(&img_rel.target, &img_data);
media.insert(img_rel.id.clone(), (img_data, fmt));
}
}
}
}
}
for rel in doc_rels.filter_by_type(&RelationshipType::Footer) {
let path = zip::resolve_target(doc_dir, &rel.target);
if let Some(data) = package.get_part(&path) {
let blocks = body::parse_blocks(data)?;
footers.insert(rel.id.clone(), blocks);
let hf_rels_path = zip::rels_path_for(&path);
if let Some(rd) = package.get_part(&hf_rels_path) {
let hf_rels = Relationships::parse(rd)?;
for img_rel in hf_rels.filter_by_type(&RelationshipType::Image) {
let img_path = zip::resolve_target(zip::part_directory(&path), &img_rel.target);
if let Some(img_data) = package.take_part(&img_path) {
let fmt = ImageFormat::detect(&img_rel.target, &img_data);
media.insert(img_rel.id.clone(), (img_data, fmt));
}
}
}
}
}
let mut footnotes = if let Some(fn_rel) = doc_rels.find_by_type(&RelationshipType::Footnotes) {
let path = zip::resolve_target(doc_dir, &fn_rel.target);
if let Some(data) = package.get_part(&path) {
notes::parse_notes(data, "footnote")?
} else {
HashMap::new()
}
} else {
HashMap::new()
};
let mut endnotes = if let Some(en_rel) = doc_rels.find_by_type(&RelationshipType::Endnotes) {
let path = zip::resolve_target(doc_dir, &en_rel.target);
if let Some(data) = package.get_part(&path) {
notes::parse_notes(data, "endnote")?
} else {
HashMap::new()
}
} else {
HashMap::new()
};
resolve_hyperlinks(&mut body_blocks, &doc_rels);
for blocks in headers.values_mut() {
resolve_hyperlinks(blocks, &doc_rels);
}
for blocks in footers.values_mut() {
resolve_hyperlinks(blocks, &doc_rels);
}
for blocks in footnotes.values_mut() {
resolve_hyperlinks(blocks, &doc_rels);
}
for blocks in endnotes.values_mut() {
resolve_hyperlinks(blocks, &doc_rels);
}
Ok(Document {
settings: doc_settings,
theme,
styles: style_sheet,
numbering: numbering_defs,
body: body_blocks,
final_section,
headers,
footers,
footnotes,
endnotes,
media,
embedded_fonts,
})
}
fn resolve_hyperlinks(blocks: &mut [Block], rels: &crate::docx::zip::Relationships) {
for block in blocks {
match block {
Block::Paragraph(p) => resolve_hyperlinks_in_inlines(&mut p.content, rels),
Block::Table(t) => {
for row in &mut t.rows {
for cell in &mut row.cells {
resolve_hyperlinks(&mut cell.content, rels);
}
}
}
_ => {}
}
}
}
fn resolve_hyperlinks_in_inlines(
inlines: &mut [crate::model::Inline],
rels: &crate::docx::zip::Relationships,
) {
use crate::model::{HyperlinkTarget, Inline, RelId};
for inline in inlines {
match inline {
Inline::Hyperlink(link) => {
if let HyperlinkTarget::External(ref rel_id) = link.target {
if let Some(rel) = rels.find_by_id(rel_id.as_str()) {
link.target = HyperlinkTarget::External(RelId::new(&rel.target));
}
}
resolve_hyperlinks_in_inlines(&mut link.content, rels);
}
Inline::Field(field) => {
resolve_hyperlinks_in_inlines(&mut field.content, rels);
}
Inline::AlternateContent(ac) => {
if let Some(ref mut fallback) = ac.fallback {
resolve_hyperlinks_in_inlines(fallback, rels);
}
}
_ => {}
}
}
}