use std::collections::HashMap;
use std::io::Read;
use quick_xml::events::Event;
use quick_xml::Reader;
use log::warn;
use crate::docx::error::{ParseError, Result};
use crate::docx::model::RelId;
use crate::docx::xml;
pub struct PackageContents {
pub parts: HashMap<String, Vec<u8>>,
}
impl PackageContents {
pub fn from_bytes(data: &[u8]) -> Result<Self> {
let cursor = std::io::Cursor::new(data);
let mut archive = zip::ZipArchive::new(cursor)?;
let mut parts = HashMap::with_capacity(archive.len());
for i in 0..archive.len() {
let mut file = archive.by_index(i)?;
let name = normalize_path(file.name());
let mut buf = Vec::with_capacity(file.size() as usize);
file.read_to_end(&mut buf)?;
parts.insert(name, buf);
}
Ok(Self { parts })
}
pub fn get_part(&self, path: &str) -> Option<&[u8]> {
let normalized = normalize_path(path);
self.parts.get(&normalized).map(|v| v.as_slice())
}
pub fn require_part(&self, path: &str) -> Result<&[u8]> {
self.get_part(path)
.ok_or_else(|| ParseError::MissingPart(path.to_string()))
}
pub fn take_part(&mut self, path: &str) -> Option<Vec<u8>> {
let normalized = normalize_path(path);
self.parts.remove(&normalized)
}
}
fn normalize_path(path: &str) -> String {
path.trim_start_matches('/').to_lowercase()
}
#[derive(Clone, Debug)]
pub struct Relationship {
pub id: RelId,
pub rel_type: RelationshipType,
pub target: String,
pub target_mode: TargetMode,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum RelationshipType {
OfficeDocument,
Styles,
Numbering,
Settings,
FontTable,
Theme,
Header,
Footer,
Footnotes,
Endnotes,
Font,
Image,
Hyperlink,
Comments,
CoreProperties,
ExtendedProperties,
CustomProperties,
CustomXml,
WebSettings,
StylesWithEffects,
GlossaryDocument,
Unknown(String),
}
impl RelationshipType {
fn from_uri(uri: &str) -> Self {
if uri.ends_with("/officeDocument") || uri.ends_with("/document") {
Self::OfficeDocument
} else if uri.ends_with("/styles") {
Self::Styles
} else if uri.ends_with("/numbering") {
Self::Numbering
} else if uri.ends_with("/settings") {
Self::Settings
} else if uri.ends_with("/fontTable") {
Self::FontTable
} else if uri.ends_with("/theme") {
Self::Theme
} else if uri.ends_with("/header") {
Self::Header
} else if uri.ends_with("/footer") {
Self::Footer
} else if uri.ends_with("/footnotes") {
Self::Footnotes
} else if uri.ends_with("/endnotes") {
Self::Endnotes
} else if uri.ends_with("/font") {
Self::Font
} else if uri.ends_with("/image") {
Self::Image
} else if uri.ends_with("/hyperlink") {
Self::Hyperlink
} else if uri.ends_with("/comments") {
Self::Comments
} else if uri.ends_with("/core-properties") || uri.ends_with("/metadata/core-properties") {
Self::CoreProperties
} else if uri.ends_with("/extended-properties") {
Self::ExtendedProperties
} else if uri.ends_with("/custom-properties") {
Self::CustomProperties
} else if uri.ends_with("/customXml") {
Self::CustomXml
} else if uri.ends_with("/webSettings") {
Self::WebSettings
} else if uri.ends_with("/stylesWithEffects") {
Self::StylesWithEffects
} else if uri.ends_with("/glossaryDocument") {
Self::GlossaryDocument
} else {
warn!("unknown relationship type: {}", uri);
Self::Unknown(uri.to_string())
}
}
}
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
pub enum TargetMode {
#[default]
Internal,
External,
}
#[derive(Clone, Debug, Default)]
pub struct Relationships {
pub(crate) rels: Vec<Relationship>,
}
impl Relationships {
pub fn parse(data: &[u8]) -> Result<Self> {
let mut reader = Reader::from_reader(data);
reader.config_mut().trim_text(true);
let mut buf = Vec::new();
let mut rels = Vec::new();
loop {
match xml::next_event(&mut reader, &mut buf)? {
Event::Empty(ref e) | Event::Start(ref e)
if xml::local_name(e.name().as_ref()) == b"Relationship" =>
{
let id = RelId::new(xml::required_attr(e, b"Id")?);
let rel_type_uri = xml::required_attr(e, b"Type")?;
let target = xml::required_attr(e, b"Target")?;
let target_mode = match xml::optional_attr(e, b"TargetMode")? {
Some(ref s) if s.eq_ignore_ascii_case("external") => TargetMode::External,
_ => TargetMode::Internal,
};
rels.push(Relationship {
id,
rel_type: RelationshipType::from_uri(&rel_type_uri),
target,
target_mode,
});
}
Event::Eof => break,
_ => {}
}
}
Ok(Self { rels })
}
pub fn find_by_type(&self, rel_type: &RelationshipType) -> Option<&Relationship> {
self.rels.iter().find(|r| &r.rel_type == rel_type)
}
pub fn filter_by_type(&self, rel_type: &RelationshipType) -> Vec<&Relationship> {
self.rels
.iter()
.filter(|r| &r.rel_type == rel_type)
.collect()
}
pub fn find_by_id(&self, id: &str) -> Option<&Relationship> {
self.rels.iter().find(|r| r.id.as_str() == id)
}
pub fn all(&self) -> &[Relationship] {
&self.rels
}
}
pub fn resolve_target(base_dir: &str, target: &str) -> String {
if target.starts_with('/') {
normalize_path(target)
} else {
let mut path = if base_dir.is_empty() {
target.to_string()
} else {
format!("{}/{}", base_dir, target)
};
while let Some(pos) = path.find("/../") {
if let Some(parent_start) = path[..pos].rfind('/') {
path = format!("{}{}", &path[..parent_start], &path[pos + 3..]);
} else {
path = path[pos + 4..].to_string();
}
}
normalize_path(&path)
}
}
pub fn rels_path_for(part_path: &str) -> String {
let normalized = normalize_path(part_path);
if let Some(slash_pos) = normalized.rfind('/') {
format!(
"{}/_rels/{}.rels",
&normalized[..slash_pos],
&normalized[slash_pos + 1..]
)
} else {
format!("_rels/{}.rels", normalized)
}
}
pub fn part_directory(part_path: &str) -> &str {
match part_path.rfind('/') {
Some(pos) => &part_path[..pos],
None => "",
}
}