use std::collections::HashSet;
use std::io::Cursor;
use quick_xml::events::Event;
use quick_xml::reader::Reader as XmlReader;
use zip::ZipArchive;
use crate::error::{TransformError, TransformErrorKind};
use crate::model::ExcelInput;
use crate::normalization::NormalizationOptions;
use super::super::invalid;
use super::super::workbook::selected_worksheet_path;
use super::super::worksheet::inspect_worksheet_xml;
use super::super::xml::local_name;
use super::read_zip_text;
pub(in crate::normalization::excel) fn preflight_xlsx_package(
bytes: &[u8],
excel: &ExcelInput,
options: &NormalizationOptions,
) -> Result<(), TransformError> {
let mut archive = ZipArchive::new(Cursor::new(bytes)).map_err(|err| {
TransformError::new(
TransformErrorKind::InvalidInput,
format!("invalid Excel ZIP package: {}", err),
)
})?;
if archive.len() > options.max_excel_zip_entries {
return Err(invalid("input exceeds max_excel_zip_entries"));
}
let mut total_uncompressed = 0usize;
let mut content_types = None;
let mut workbook_xml = None;
let mut workbook_rels = None;
let mut worksheet_count = 0usize;
let mut seen_entry_names = HashSet::new();
for index in 0..archive.len() {
let mut entry = archive.by_index(index).map_err(|err| {
TransformError::new(
TransformErrorKind::InvalidInput,
format!("failed to inspect Excel ZIP entry: {}", err),
)
})?;
let name = entry.name().to_string();
let lower_name = name.to_ascii_lowercase();
if !seen_entry_names.insert(lower_name.clone()) {
return Err(invalid("Excel ZIP entry names must be unique"));
}
let size =
usize::try_from(entry.size()).map_err(|_| invalid("Excel ZIP entry too large"))?;
if size > options.max_excel_entry_uncompressed_bytes {
return Err(invalid("input exceeds max_excel_entry_uncompressed_bytes"));
}
total_uncompressed = total_uncompressed
.checked_add(size)
.ok_or_else(|| invalid("input exceeds max_excel_uncompressed_bytes"))?;
if total_uncompressed > options.max_excel_uncompressed_bytes {
return Err(invalid("input exceeds max_excel_uncompressed_bytes"));
}
if lower_name.ends_with("vbaproject.bin") {
return Err(invalid("Excel macros are not supported"));
}
if lower_name == "[content_types].xml" {
content_types = Some(read_zip_text(&mut entry)?);
} else if lower_name == "xl/workbook.xml" {
workbook_xml = Some(read_zip_text(&mut entry)?);
} else if lower_name == "xl/_rels/workbook.xml.rels" {
let rels = read_zip_text(&mut entry)?;
reject_external_relationships(&rels)?;
workbook_rels = Some(rels);
} else if lower_name.starts_with("xl/worksheets/") && lower_name.ends_with(".xml") {
worksheet_count = worksheet_count.saturating_add(1);
if worksheet_count > options.max_excel_sheets {
return Err(invalid("input exceeds max_excel_sheets"));
}
} else if lower_name.ends_with(".rels") {
let rels = read_zip_text(&mut entry)?;
reject_external_relationships(&rels)?;
} else if lower_name == "xl/sharedstrings.xml" {
if size > options.max_excel_shared_string_bytes {
return Err(invalid("input exceeds max_excel_shared_string_bytes"));
}
let shared_strings = read_zip_text(&mut entry)?;
if count_xml_elements(&shared_strings, b"si")? > options.max_excel_shared_strings {
return Err(invalid("input exceeds max_excel_shared_strings"));
}
} else if lower_name == "xl/styles.xml" {
let styles = read_zip_text(&mut entry)?;
if count_xml_elements(&styles, b"xf")? > options.max_excel_styles {
return Err(invalid("input exceeds max_excel_styles"));
}
}
}
let content_types =
content_types.ok_or_else(|| invalid("Excel package is missing [Content_Types].xml"))?;
let lower_content_types = content_types.to_ascii_lowercase();
if lower_content_types.contains("macroenabled")
|| lower_content_types.contains("application/vnd.ms-excel.sheet.binary.macroenabled")
{
return Err(invalid("Excel macros are not supported"));
}
if !lower_content_types
.contains("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml")
{
return Err(invalid("only .xlsx workbooks are supported"));
}
let workbook_xml =
workbook_xml.ok_or_else(|| invalid("Excel package is missing workbook.xml"))?;
let workbook_rels =
workbook_rels.ok_or_else(|| invalid("Excel package is missing workbook relationships"))?;
let selected_worksheet_path = selected_worksheet_path(&workbook_xml, &workbook_rels, excel)?;
let mut selected_sheet = archive.by_name(&selected_worksheet_path).map_err(|err| {
TransformError::new(
TransformErrorKind::InvalidInput,
format!("failed to inspect selected Excel worksheet: {}", err),
)
})?;
let selected_sheet = read_zip_text(&mut selected_sheet)?;
let counts = inspect_worksheet_xml(&selected_sheet, excel.formula)?;
if counts.rows > options.max_excel_rows || counts.max_row > options.max_excel_rows {
return Err(invalid("input exceeds max_excel_rows"));
}
if counts.cells > options.max_excel_cells {
return Err(invalid("input exceeds max_excel_cells"));
}
let dense_cells = counts
.max_row
.checked_mul(counts.max_col)
.ok_or_else(|| invalid("input exceeds max_excel_cells"))?;
if dense_cells > options.max_excel_cells {
return Err(invalid("input exceeds max_excel_cells"));
}
Ok(())
}
fn reject_external_relationships(rels: &str) -> Result<(), TransformError> {
let lower = rels.to_ascii_lowercase();
let compact = lower
.chars()
.filter(|value| !value.is_ascii_whitespace())
.collect::<String>();
if compact.contains("targetmode=\"external\"")
|| compact.contains("targetmode='external'")
|| compact.contains("target=\"http:")
|| compact.contains("target=\"https:")
|| compact.contains("target=\"file:")
|| compact.contains("target='http:")
|| compact.contains("target='https:")
|| compact.contains("target='file:")
{
return Err(invalid("Excel external relationships are not supported"));
}
Ok(())
}
fn count_xml_elements(xml: &str, name: &[u8]) -> Result<usize, TransformError> {
let mut reader = XmlReader::from_str(xml);
reader.trim_text(false);
let mut count = 0usize;
loop {
match reader.read_event() {
Ok(Event::Start(event)) | Ok(Event::Empty(event)) => {
if local_name(event.name().as_ref()) == name {
count = count.saturating_add(1);
}
}
Ok(Event::Eof) => break,
Ok(_) => {}
Err(err) => {
return Err(TransformError::new(
TransformErrorKind::InvalidInput,
format!("failed to parse Excel XML: {}", err),
));
}
}
}
Ok(count)
}