use crate::ooxml::opc::error::{OpcError, Result};
use crate::ooxml::opc::packuri::PackURI;
use crate::ooxml::opc::rel::Relationships;
use memchr::memmem;
use quick_xml::events::Event;
use quick_xml::Reader;
use std::collections::HashMap;
use std::sync::Arc;
pub trait Part {
fn partname(&self) -> &PackURI;
fn content_type(&self) -> &str;
fn blob(&self) -> &[u8];
fn rels(&self) -> &Relationships;
fn rels_mut(&mut self) -> &mut Relationships;
fn relate_to(&mut self, target_partname: &str, reltype: &str) -> String {
let rel = self.rels_mut().get_or_add(reltype, target_partname);
rel.r_id().to_string()
}
fn relate_to_ext(&mut self, target_url: &str, reltype: &str) -> String {
self.rels_mut().get_or_add_ext_rel(reltype, target_url)
}
fn target_ref(&self, r_id: &str) -> Result<&str> {
self.rels()
.get(r_id)
.map(|rel| rel.target_ref())
.ok_or_else(|| OpcError::RelationshipNotFound(format!("rId: {}", r_id)))
}
fn rel_ref_count(&self, r_id: &str) -> usize {
let blob = self.blob();
let pattern = format!(r#"r:id="{}""#, r_id);
let finder = memmem::Finder::new(pattern.as_bytes());
finder.find_iter(blob).count()
}
}
#[derive(Debug)]
pub struct BlobPart {
partname: PackURI,
content_type: String,
blob: Arc<Vec<u8>>,
rels: Relationships,
}
impl BlobPart {
pub fn new(partname: PackURI, content_type: String, blob: Vec<u8>) -> Self {
let rels = Relationships::new(partname.base_uri().to_string());
Self {
partname,
content_type,
blob: Arc::new(blob),
rels,
}
}
pub fn load(partname: PackURI, content_type: String, blob: Vec<u8>) -> Self {
Self::new(partname, content_type, blob)
}
}
impl Part for BlobPart {
fn partname(&self) -> &PackURI {
&self.partname
}
fn content_type(&self) -> &str {
&self.content_type
}
fn blob(&self) -> &[u8] {
&self.blob
}
fn rels(&self) -> &Relationships {
&self.rels
}
fn rels_mut(&mut self) -> &mut Relationships {
&mut self.rels
}
}
#[derive(Debug)]
pub struct XmlPart {
partname: PackURI,
content_type: String,
xml_bytes: Arc<Vec<u8>>,
rels: Relationships,
element_cache: HashMap<String, String>,
}
impl XmlPart {
pub fn new(partname: PackURI, content_type: String, xml_bytes: Vec<u8>) -> Self {
let rels = Relationships::new(partname.base_uri().to_string());
Self {
partname,
content_type,
xml_bytes: Arc::new(xml_bytes),
rels,
element_cache: HashMap::new(),
}
}
pub fn load(partname: PackURI, content_type: String, xml_bytes: Vec<u8>) -> Result<Self> {
std::str::from_utf8(&xml_bytes)
.map_err(|e| OpcError::XmlError(format!("Invalid UTF-8 in XML: {}", e)))?;
Ok(Self::new(partname, content_type, xml_bytes))
}
pub fn reader(&self) -> Reader<&[u8]> {
let mut reader = Reader::from_reader(&**self.xml_bytes);
reader.config_mut().trim_text(true);
reader
}
pub fn extract_text(&mut self, element_name: &str) -> Result<Option<String>> {
if let Some(cached) = self.element_cache.get(element_name) {
return Ok(Some(cached.clone()));
}
let mut reader = self.reader();
let mut buf = Vec::new();
let mut in_target_element = false;
let mut text_content = String::new();
let element_name_bytes = element_name.as_bytes();
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => {
if e.local_name().as_ref() == element_name_bytes {
in_target_element = true;
}
}
Ok(Event::Text(e)) if in_target_element => {
let text = std::str::from_utf8(e.as_ref())?;
text_content.push_str(text);
}
Ok(Event::End(ref e)) => {
if e.local_name().as_ref() == element_name_bytes {
in_target_element = false;
if !text_content.is_empty() {
self.element_cache
.insert(element_name.to_string(), text_content.clone());
return Ok(Some(text_content));
}
}
}
Ok(Event::Eof) => break,
Err(e) => return Err(OpcError::XmlError(format!("XML parse error: {}", e))),
_ => {}
}
buf.clear();
}
Ok(None)
}
pub fn find_elements_with_attrs(
&self,
element_name: &str,
) -> Result<Vec<HashMap<String, String>>> {
let mut reader = self.reader();
let mut buf = Vec::new();
let mut results = Vec::new();
let element_name_bytes = element_name.as_bytes();
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) | Ok(Event::Empty(ref e)) => {
if e.local_name().as_ref() == element_name_bytes {
let mut attrs = HashMap::new();
for attr in e.attributes() {
let attr = attr?;
let key = std::str::from_utf8(attr.key.as_ref())?;
let value = attr.unescape_value()?;
attrs.insert(key.to_string(), value.to_string());
}
results.push(attrs);
}
}
Ok(Event::Eof) => break,
Err(e) => return Err(OpcError::XmlError(format!("XML parse error: {}", e))),
_ => {}
}
buf.clear();
}
Ok(results)
}
pub fn xml_str(&self) -> Result<&str> {
std::str::from_utf8(&self.xml_bytes).map_err(Into::into)
}
}
impl Part for XmlPart {
fn partname(&self) -> &PackURI {
&self.partname
}
fn content_type(&self) -> &str {
&self.content_type
}
fn blob(&self) -> &[u8] {
&self.xml_bytes
}
fn rels(&self) -> &Relationships {
&self.rels
}
fn rels_mut(&mut self) -> &mut Relationships {
&mut self.rels
}
}
pub struct PartFactory;
impl PartFactory {
pub fn load(partname: PackURI, content_type: String, blob: Vec<u8>) -> Result<Box<dyn Part>> {
if Self::is_xml_content_type(&content_type) {
Ok(Box::new(XmlPart::load(partname, content_type, blob)?))
} else {
Ok(Box::new(BlobPart::load(partname, content_type, blob)))
}
}
#[inline]
fn is_xml_content_type(content_type: &str) -> bool {
content_type.ends_with("+xml") || content_type.ends_with("/xml")
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_blob_part() {
let partname = PackURI::new("/word/media/image1.png").unwrap();
let content = vec![0x89, 0x50, 0x4E, 0x47]; let part = BlobPart::new(partname, "image/png".to_string(), content.clone());
assert_eq!(part.content_type(), "image/png");
assert_eq!(part.blob(), content.as_slice());
}
#[test]
fn test_xml_part() {
let partname = PackURI::new("/word/document.xml").unwrap();
let xml = b"<root><text>Hello</text></root>".to_vec();
let mut part = XmlPart::new(partname, "application/xml".to_string(), xml);
let text = part.extract_text("text").unwrap();
assert_eq!(text, Some("Hello".to_string()));
}
#[test]
fn test_is_xml_content_type() {
assert!(PartFactory::is_xml_content_type("application/xml"));
assert!(PartFactory::is_xml_content_type(
"application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"
));
assert!(!PartFactory::is_xml_content_type("image/png"));
}
}