use std::io::{Cursor, Read, Seek};
use quick_xml::Reader;
use quick_xml::events::Event;
use crate::models::{AuditSignal, DocumentAudit, DocumentType, Extraction, OutputWarning};
use crate::parsers::{
attr_value, name_eq, parent_dir, parse_relationships, resolve_relationship_target,
};
use crate::vfs::OoxmlPackage;
use crate::{OxdocError, Result};
use super::{detect_document_type, metadata};
pub(crate) fn read_audit<R: Read + Seek>(
package: &mut OoxmlPackage<R>,
file_name: String,
) -> Result<Extraction<DocumentAudit>> {
let document_type = detect_document_type(package)?;
let info = metadata::read_info(package, file_name.clone())?;
let mut warnings = info.warnings;
let mut signals = Vec::new();
if info.value.has_macros {
signals.push(AuditSignal::new(
"macros",
"high",
"[Content_Types].xml",
"VBA macro content is present or declared",
));
}
if let Some(custom_properties) = &info.value.custom_properties
&& !custom_properties.is_empty()
{
signals.push(AuditSignal::new(
"custom_properties",
"info",
"docProps/custom.xml",
format!(
"{} custom document properties are present",
custom_properties.len()
),
));
}
if matches!(document_type, DocumentType::Xlsx)
&& let Some(hidden) = audit_hidden_xlsx_sheets(package)?
{
warnings.extend(hidden.warnings);
signals.extend(hidden.value);
}
let relationship_signals = audit_relationship_targets(package)?;
warnings.extend(relationship_signals.warnings);
signals.extend(relationship_signals.value);
signals.extend(warnings.iter().map(warning_signal));
Ok(Extraction::with_warnings(
DocumentAudit {
file: file_name,
document_type: document_type_name(document_type).to_owned(),
metadata: info.value,
signals,
},
warnings,
))
}
fn audit_hidden_xlsx_sheets<R: Read + Seek>(
package: &mut OoxmlPackage<R>,
) -> Result<Option<Extraction<Vec<AuditSignal>>>> {
if !package.contains("xl/workbook.xml") {
return Ok(None);
}
let workbook_xml = package.read_to_string("xl/workbook.xml")?;
parse_hidden_xlsx_sheets(&workbook_xml, "xl/workbook.xml").map(Some)
}
fn parse_hidden_xlsx_sheets(xml: &str, path: &str) -> Result<Extraction<Vec<AuditSignal>>> {
let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
reader.config_mut().trim_text(true);
let mut buf = Vec::new();
let mut signals = Vec::new();
let mut warnings = Vec::new();
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(element)) | Ok(Event::Empty(element))
if name_eq(element.name().as_ref(), b"sheet") =>
{
let state = attr_value(&element, b"state").unwrap_or_default();
if state.eq_ignore_ascii_case("hidden") || state.eq_ignore_ascii_case("veryHidden")
{
let name = attr_value(&element, b"name").unwrap_or_else(|| "<unnamed>".into());
signals.push(AuditSignal::new(
"hidden_sheet",
"warning",
path,
format!("worksheet '{name}' is {state}"),
));
}
}
Ok(Event::Eof) => break,
Err(source) => {
warnings.push(OutputWarning::malformed_xml(path, source));
break;
}
_ => {}
}
buf.clear();
}
Ok(Extraction::with_warnings(signals, warnings))
}
fn audit_relationship_targets<R: Read + Seek>(
package: &mut OoxmlPackage<R>,
) -> Result<Extraction<Vec<AuditSignal>>> {
let mut signals = Vec::new();
let mut warnings = Vec::new();
let mut relationship_paths = package
.part_names()
.into_iter()
.filter(|path| path.ends_with(".rels"))
.collect::<Vec<_>>();
relationship_paths.sort();
for relationship_path in relationship_paths {
let xml = package.read_to_string(&relationship_path)?;
let relationships = match parse_relationships(&xml, &relationship_path) {
Ok(relationships) => relationships,
Err(OxdocError::MalformedXmlNode { source, .. }) => {
warnings.push(OutputWarning::malformed_xml(&relationship_path, source));
continue;
}
Err(err) => return Err(err),
};
for relationship in relationships {
if let Err(err) = resolve_relationship_target(
&relationship_base_dir(&relationship_path),
&relationship,
&relationship_path,
) {
match err {
OxdocError::SuspiciousRelationshipTarget { target, reason, .. } => {
signals.push(AuditSignal::new(
"relationship_target",
"warning",
&relationship_path,
format!("relationship target '{target}' is suspicious: {reason}"),
));
}
err => return Err(err),
}
}
}
}
Ok(Extraction::with_warnings(signals, warnings))
}
fn relationship_base_dir(relationship_path: &str) -> String {
if relationship_path == "_rels/.rels" {
return String::new();
}
if let Some((prefix, file)) = relationship_path.rsplit_once("/_rels/") {
let source = file.trim_end_matches(".rels");
if prefix.is_empty() {
return parent_dir(source).to_owned();
}
if source.is_empty() {
return prefix.to_owned();
}
return parent_dir(&format!("{prefix}/{source}")).to_owned();
}
parent_dir(relationship_path).to_owned()
}
fn warning_signal(warning: &OutputWarning) -> AuditSignal {
AuditSignal::new(
"parser_warning",
"warning",
&warning.path,
format!(
"warning[{}/{}]: {}",
warning.category().as_str(),
warning.code().as_str(),
warning.message
),
)
}
fn document_type_name(document_type: DocumentType) -> &'static str {
match document_type {
DocumentType::Docx => "docx",
DocumentType::Pptx => "pptx",
DocumentType::Xlsx => "xlsx",
DocumentType::Unknown => "unknown",
}
}
#[cfg(test)]
mod tests {
use super::relationship_base_dir;
#[test]
fn derives_relationship_base_dirs() {
assert_eq!(relationship_base_dir("_rels/.rels"), "");
assert_eq!(
relationship_base_dir("word/_rels/document.xml.rels"),
"word"
);
assert_eq!(
relationship_base_dir("ppt/slides/_rels/slide1.xml.rels"),
"ppt/slides"
);
}
}