use std::borrow::Cow;
use quick_xml::NsReader;
use quick_xml::events::BytesStart;
use quick_xml::name::{Namespace, ResolveResult};
use super::error::{Error, Result};
pub mod ns {
pub const CONTENT_TYPES: &[u8] =
b"http://schemas.openxmlformats.org/package/2006/content-types";
pub const RELATIONSHIPS: &[u8] =
b"http://schemas.openxmlformats.org/package/2006/relationships";
pub const CORE_PROPERTIES: &[u8] =
b"http://schemas.openxmlformats.org/package/2006/metadata/core-properties";
pub const DC: &[u8] = b"http://purl.org/dc/elements/1.1/";
pub const DC_TERMS: &[u8] = b"http://purl.org/dc/terms/";
pub const DRAWING_ML: &[u8] = b"http://schemas.openxmlformats.org/drawingml/2006/main";
pub const WML: &[u8] = b"http://schemas.openxmlformats.org/wordprocessingml/2006/main";
pub const SML: &[u8] = b"http://schemas.openxmlformats.org/spreadsheetml/2006/main";
pub const PML: &[u8] = b"http://schemas.openxmlformats.org/presentationml/2006/main";
pub const R: &[u8] = b"http://schemas.openxmlformats.org/officeDocument/2006/relationships";
pub const EXTENDED_PROPERTIES: &[u8] =
b"http://schemas.openxmlformats.org/officeDocument/2006/extended-properties";
pub const WML_STR: &str = "http://schemas.openxmlformats.org/wordprocessingml/2006/main";
pub const SML_STR: &str = "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
pub const PML_STR: &str = "http://schemas.openxmlformats.org/presentationml/2006/main";
pub const DRAWING_ML_STR: &str = "http://schemas.openxmlformats.org/drawingml/2006/main";
pub const R_STR: &str = "http://schemas.openxmlformats.org/officeDocument/2006/relationships";
pub const STRICT_WML: &[u8] = b"http://purl.oclc.org/ooxml/wordprocessingml/main";
pub const STRICT_SML: &[u8] = b"http://purl.oclc.org/ooxml/spreadsheetml/main";
pub const STRICT_PML: &[u8] = b"http://purl.oclc.org/ooxml/presentationml/main";
pub const STRICT_DRAWING: &[u8] = b"http://purl.oclc.org/ooxml/drawingml/main";
pub const STRICT_R: &[u8] = b"http://purl.oclc.org/ooxml/officeDocument/relationships";
}
fn strict_alternate(ns: &[u8]) -> Option<&'static [u8]> {
match ns {
x if x == ns::WML => Some(ns::STRICT_WML),
x if x == ns::SML => Some(ns::STRICT_SML),
x if x == ns::PML => Some(ns::STRICT_PML),
x if x == ns::DRAWING_ML => Some(ns::STRICT_DRAWING),
x if x == ns::R => Some(ns::STRICT_R),
_ => None,
}
}
pub fn matches_start(resolve: &ResolveResult, start: &BytesStart, ns: &[u8], local: &[u8]) -> bool {
start.local_name().as_ref() == local
&& match resolve {
ResolveResult::Bound(Namespace(n)) => {
*n == ns || strict_alternate(ns).is_some_and(|s| *n == s)
},
_ => false,
}
}
pub fn matches_ns(resolve: &ResolveResult, ns: &[u8]) -> bool {
match resolve {
ResolveResult::Bound(Namespace(n)) => {
*n == ns || strict_alternate(ns).is_some_and(|s| *n == s)
},
_ => false,
}
}
pub fn required_attr<'a>(event: &'a BytesStart, key: &[u8]) -> Result<Cow<'a, [u8]>> {
match event.try_get_attribute(key)? {
Some(attr) => Ok(attr.value),
None => Err(Error::MissingAttribute {
element: String::from_utf8_lossy(event.local_name().as_ref()).into_owned(),
attr: String::from_utf8_lossy(key).into_owned(),
}),
}
}
pub fn required_attr_str<'a>(event: &'a BytesStart, key: &[u8]) -> Result<Cow<'a, str>> {
let value = required_attr(event, key)?;
match value {
Cow::Borrowed(b) => Ok(Cow::Borrowed(std::str::from_utf8(b)?)),
Cow::Owned(v) => Ok(Cow::Owned(String::from_utf8(v).map_err(|e| e.utf8_error())?)),
}
}
pub fn optional_attr<'a>(event: &'a BytesStart, key: &[u8]) -> Result<Option<Cow<'a, [u8]>>> {
Ok(event.try_get_attribute(key)?.map(|a| a.value))
}
pub fn optional_attr_str<'a>(event: &'a BytesStart, key: &[u8]) -> Result<Option<Cow<'a, str>>> {
match optional_attr(event, key)? {
Some(Cow::Borrowed(b)) => Ok(Some(Cow::Borrowed(std::str::from_utf8(b)?))),
Some(Cow::Owned(v)) => {
Ok(Some(Cow::Owned(String::from_utf8(v).map_err(|e| e.utf8_error())?)))
},
None => Ok(None),
}
}
pub fn optional_prefixed_attr_str<'a>(
event: &'a BytesStart,
local_name: &[u8],
) -> Result<Option<Cow<'a, str>>> {
for attr in event.attributes().flatten() {
let key = attr.key.as_ref();
if let Some(pos) = key.iter().position(|&b| b == b':') {
if &key[pos + 1..] == local_name {
let value = attr.unescape_value()?;
return Ok(Some(value));
}
} else if key == local_name {
let value = attr.unescape_value()?;
return Ok(Some(value));
}
}
Ok(None)
}
pub fn parse_toggle(e: &BytesStart, attr_name: &[u8]) -> bool {
match optional_attr_str(e, attr_name) {
Ok(Some(ref val)) => !matches!(val.as_ref(), "0" | "false" | "off"),
_ => true,
}
}
pub fn read_text_content(reader: &mut NsReader<&[u8]>) -> Result<String> {
use quick_xml::events::Event;
let mut text = String::new();
let mut depth = 1u32;
loop {
match reader.read_event()? {
Event::Text(e) => {
text.push_str(&e.unescape()?);
},
Event::CData(e) => {
text.push_str(std::str::from_utf8(&e)?);
},
Event::Start(_) => depth += 1,
Event::End(_) => {
depth -= 1;
if depth == 0 {
break;
}
},
Event::Eof => break,
_ => {},
}
}
Ok(text)
}
pub fn skip_element(reader: &mut NsReader<&[u8]>) -> Result<()> {
use quick_xml::events::Event;
let mut depth = 1u32;
loop {
match reader.read_event()? {
Event::Start(_) => depth += 1,
Event::End(_) => {
depth -= 1;
if depth == 0 {
break;
}
},
Event::Eof => break,
_ => {},
}
}
Ok(())
}
pub fn make_reader(xml: &[u8]) -> NsReader<&[u8]> {
let mut reader = NsReader::from_reader(xml);
let config = reader.config_mut();
config.trim_text(true);
config.check_end_names = false;
config.check_comments = false;
reader
}
pub fn make_fast_reader(xml: &[u8]) -> quick_xml::Reader<&[u8]> {
let mut reader = quick_xml::Reader::from_reader(xml);
let config = reader.config_mut();
config.trim_text(true);
config.check_end_names = false;
config.check_comments = false;
reader
}
pub fn read_text_content_fast(reader: &mut quick_xml::Reader<&[u8]>) -> Result<String> {
use quick_xml::events::Event;
let mut text = String::new();
let mut depth = 1u32;
loop {
match reader.read_event()? {
Event::Text(e) => {
text.push_str(&e.unescape()?);
},
Event::CData(e) => {
text.push_str(&String::from_utf8_lossy(&e));
},
Event::Start(_) => depth += 1,
Event::End(_) => {
depth -= 1;
if depth == 0 {
break;
}
},
Event::Eof => break,
_ => {},
}
}
Ok(text)
}
pub fn skip_element_fast(reader: &mut quick_xml::Reader<&[u8]>) -> Result<()> {
use quick_xml::events::Event;
let mut depth = 1u32;
loop {
match reader.read_event()? {
Event::Start(_) => depth += 1,
Event::End(_) => {
depth -= 1;
if depth == 0 {
break;
}
},
Event::Eof => break,
_ => {},
}
}
Ok(())
}
pub fn ensure_utf8(data: &[u8]) -> Option<Vec<u8>> {
if std::str::from_utf8(data).is_ok() {
return None;
}
let header = &data[..data.len().min(200)];
let header_str = String::from_utf8_lossy(header);
let encoding_name = if let Some(pos) = header_str.find("encoding=") {
let rest = &header_str[pos + 9..];
let quote = rest.as_bytes().first().copied().unwrap_or(b'"');
if quote == b'"' || quote == b'\'' {
let inner = &rest[1..];
inner.split(quote as char).next().unwrap_or("utf-8")
} else {
return None;
}
} else {
"iso-8859-1"
};
let encoding = encoding_rs::Encoding::for_label(encoding_name.as_bytes())?;
if encoding == encoding_rs::UTF_8 {
return None;
}
let (result, _, had_errors) = encoding.decode(data);
if had_errors {
return None;
}
let mut utf8 = result.into_owned().into_bytes();
if let Some(pos) = utf8
.windows(9)
.position(|w| w.eq_ignore_ascii_case(b"encoding="))
{
let rest = &utf8[pos + 9..];
if let Some("e) = rest.first() {
if quote == b'"' || quote == b'\'' {
if let Some(end) = rest[1..].iter().position(|&b| b == quote) {
let start = pos + 10;
let end = start + end;
utf8.splice(start..end, b"utf-8".iter().copied());
}
}
}
}
Some(utf8)
}