pub mod cell;
pub mod date;
pub mod edit;
pub mod error;
pub mod shared_strings;
pub mod styles;
pub mod text;
pub mod workbook;
pub mod worksheet;
pub mod write;
pub use cell::{Cell, CellRef, CellValue};
pub use date::DateTimeValue;
pub use error::{Result, XlsxError};
pub use shared_strings::SharedStringTable;
pub use styles::StyleSheet;
pub use workbook::{SheetState, WorkbookInfo};
pub use worksheet::{HyperlinkInfo, HyperlinkTarget, Worksheet};
use std::fs::File;
use std::io::{Read, Seek};
use std::path::Path;
use log::debug;
use zip::read::ZipArchive;
use crate::core::opc::{self, OpcReader};
use crate::core::relationships::{Relationships, rel_types};
use crate::core::theme::Theme;
#[derive(Debug, Clone)]
pub struct XlsxDocument {
pub workbook: WorkbookInfo,
pub worksheets: Vec<Worksheet>,
pub shared_strings: SharedStringTable,
pub styles: Option<StyleSheet>,
pub theme: Option<Theme>,
styles_data: Option<Vec<u8>>,
theme_data: Option<Vec<u8>>,
}
impl XlsxDocument {
pub fn ensure_styles(&mut self) -> Option<&StyleSheet> {
if self.styles.is_none() {
if let Some(data) = self.styles_data.take() {
self.styles = StyleSheet::parse(&data).ok();
}
}
self.styles.as_ref()
}
pub fn ensure_theme(&mut self) -> Option<&Theme> {
if self.theme.is_none() {
if let Some(data) = self.theme_data.take() {
self.theme = Theme::parse(&data).ok();
}
}
self.theme.as_ref()
}
}
impl XlsxDocument {
pub fn open(path: impl AsRef<Path>) -> Result<Self> {
let file = File::open(path).map_err(crate::core::Error::from)?;
let archive = ZipArchive::new(file).map_err(crate::core::Error::from)?;
Self::from_zip(archive)
}
#[cfg(feature = "mmap")]
pub fn open_mmap(path: impl AsRef<Path>) -> Result<Self> {
let file = File::open(path).map_err(crate::core::Error::from)?;
let mmap = unsafe { memmap2::Mmap::map(&file).map_err(crate::core::Error::from)? };
debug!("XLSX fast path: mmap opened ({} bytes)", mmap.len());
let archive =
ZipArchive::new(std::io::Cursor::new(mmap)).map_err(crate::core::Error::from)?;
Self::from_zip(archive)
}
pub fn from_reader<R: Read + Seek>(reader: R) -> Result<Self> {
let archive = ZipArchive::new(reader).map_err(crate::core::Error::from)?;
Self::from_zip(archive)
}
fn read_xml_entry<R: Read + Seek>(
archive: &mut ZipArchive<R>,
name: &str,
) -> std::result::Result<Vec<u8>, crate::core::Error> {
let data = opc::read_zip_entry(archive, name)?;
if name.ends_with(".xml") || name.ends_with(".rels") {
if let Some(utf8_data) = crate::core::xml::ensure_utf8(&data) {
return Ok(utf8_data);
}
}
Ok(data)
}
fn from_zip<R: Read + Seek>(mut archive: ZipArchive<R>) -> Result<Self> {
debug!("XlsxDocument: fast path parsing started ({} ZIP entries)", archive.len());
let wb_rels = match Self::read_xml_entry(&mut archive, "xl/_rels/workbook.xml.rels") {
Ok(data) => Relationships::parse(&data)?,
Err(_) => Relationships::empty(),
};
let shared_strings = match Self::read_xml_entry(&mut archive, "xl/sharedStrings.xml") {
Ok(data) => SharedStringTable::parse(&data)?,
Err(_) => SharedStringTable::empty(),
};
let styles = match Self::read_xml_entry(&mut archive, "xl/styles.xml") {
Ok(data) => StyleSheet::parse(&data).ok(),
Err(_) => None,
};
let theme_data = Self::read_xml_entry(&mut archive, "xl/theme/theme1.xml").ok();
let wb_data = Self::read_xml_entry(&mut archive, "xl/workbook.xml")?;
let workbook = WorkbookInfo::parse(&wb_data)?;
struct SheetBundle {
name: String,
data: Vec<u8>,
rels: Relationships,
}
let mut bundles = Vec::with_capacity(workbook.sheets.len());
for sheet in &workbook.sheets {
if sheet.rel_id.is_empty() {
continue;
}
let sheet_path = if let Some(rel) = wb_rels.get_by_id(&sheet.rel_id) {
let target = &rel.target;
if let Some(stripped) = target.strip_prefix('/') {
stripped.to_string()
} else {
format!("xl/{}", target)
}
} else {
let idx = bundles.len() + 1;
format!("xl/worksheets/sheet{}.xml", idx)
};
let ws_data = match Self::read_xml_entry(&mut archive, &sheet_path) {
Ok(data) => data,
Err(_) => {
let idx = bundles.len() + 1;
let alt = format!("xl/worksheets/sheet{}.xml", idx);
match Self::read_xml_entry(&mut archive, &alt) {
Ok(data) => data,
Err(_) => continue,
}
},
};
let rels_path = sheet_rels_path(&sheet_path);
let ws_rels = match Self::read_xml_entry(&mut archive, &rels_path) {
Ok(data) => Relationships::parse(&data).unwrap_or_else(|_| Relationships::empty()),
Err(_) => Relationships::empty(),
};
bundles.push(SheetBundle {
name: sheet.name.clone(),
data: ws_data,
rels: ws_rels,
});
}
let worksheets = crate::core::parallel::map_collect(bundles, |b| -> Result<Worksheet> {
let ws = Worksheet::parse(&b.data, b.name, &b.rels)?;
Ok(ws)
})?;
debug!("XlsxDocument: {} worksheets parsed", worksheets.len());
Ok(XlsxDocument {
workbook,
worksheets,
shared_strings,
styles,
theme: None,
styles_data: None,
theme_data,
})
}
#[allow(dead_code)]
pub(crate) fn from_opc<R: Read + Seek>(mut opc: OpcReader<R>) -> Result<Self> {
debug!("XlsxDocument: OPC parsing started");
let main_part = opc.main_document_part()?;
let wb_rels = opc.read_rels_for(&main_part)?;
let shared_strings = if let Some(rel) = wb_rels.first_by_type(rel_types::SHARED_STRINGS) {
let part_name = main_part.resolve_relative(&rel.target)?;
let data = opc.read_part(&part_name)?;
SharedStringTable::parse(&data)?
} else {
SharedStringTable::empty()
};
let theme_data = if let Some(rel) = wb_rels.first_by_type(rel_types::THEME) {
let part_name = main_part.resolve_relative(&rel.target)?;
opc.read_part(&part_name).ok()
} else {
None
};
let styles = if let Some(rel) = wb_rels.first_by_type(rel_types::STYLES) {
let part_name = main_part.resolve_relative(&rel.target)?;
let data = opc.read_part(&part_name)?;
Some(StyleSheet::parse(&data)?)
} else {
None
};
let wb_data = opc.read_part(&main_part)?;
let workbook = WorkbookInfo::parse(&wb_data)?;
struct SheetBundle {
name: String,
data: Vec<u8>,
rels: Relationships,
}
let mut bundles = Vec::with_capacity(workbook.sheets.len());
for sheet in &workbook.sheets {
if sheet.rel_id.is_empty() {
continue;
}
let part_name = match wb_rels.resolve_target(&sheet.rel_id, &main_part) {
Ok(pn) => pn,
Err(_) => {
let idx = bundles.len() + 1;
let candidates = [
format!("/xl/worksheets/sheet{}.xml", idx),
format!("/xl/worksheets/sheet{}.xml", sheet.sheet_id),
];
match candidates.iter().find_map(|c| {
crate::core::opc::PartName::new(c)
.ok()
.filter(|pn| opc.has_part(pn))
}) {
Some(pn) => {
debug!("worksheet fallback: '{}' -> '{}'", sheet.rel_id, pn);
pn
},
None => continue,
}
},
};
let ws_rels = opc
.read_rels_for(&part_name)
.unwrap_or_else(|_| Relationships::empty());
let ws_data = match opc.read_part(&part_name) {
Ok(data) => data,
Err(_) => continue,
};
bundles.push(SheetBundle {
name: sheet.name.clone(),
data: ws_data,
rels: ws_rels,
});
}
#[cfg(feature = "parallel")]
let worksheets: Result<Vec<Worksheet>> = {
use rayon::prelude::*;
bundles
.into_par_iter()
.map(|b| {
let ws = Worksheet::parse(&b.data, b.name, &b.rels)?;
Ok(ws)
})
.collect()
};
#[cfg(not(feature = "parallel"))]
let worksheets: Result<Vec<Worksheet>> = bundles
.into_iter()
.map(|b| {
let ws = Worksheet::parse(&b.data, b.name, &b.rels)?;
Ok(ws)
})
.collect();
let worksheets = worksheets?;
debug!("XlsxDocument: {} worksheets parsed", worksheets.len());
Ok(XlsxDocument {
workbook,
worksheets,
shared_strings,
styles,
theme: None,
styles_data: None,
theme_data,
})
}
}
fn sheet_rels_path(sheet_path: &str) -> String {
if let Some(pos) = sheet_path.rfind('/') {
let dir = &sheet_path[..pos];
let file = &sheet_path[pos + 1..];
format!("{}/_rels/{}.rels", dir, file)
} else {
format!("_rels/{}.rels", sheet_path)
}
}
impl crate::core::OfficeDocument for XlsxDocument {
fn plain_text(&self) -> String {
self.plain_text()
}
fn to_markdown(&self) -> String {
self.to_markdown()
}
}