use std::collections::HashMap;
use std::fs::File;
use std::path::{Path, PathBuf};
use calamine_styles::{open_workbook_auto, Reader, Sheets};
use zip::ZipArchive;
use crate::csv_reader::CsvBackend;
use crate::error::{Error, Result};
use crate::map::{classify_sheet, SheetMap, WorkbookMap};
use crate::ooxml::{
join_and_normalize, parse_relationship_targets, parse_workbook_sheet_rids, zip_read_to_string,
zip_read_to_string_opt,
};
use crate::sheet::{Sheet, SheetsReader};
use crate::styles::{parse_cellxfs, parse_num_fmts, XfEntry};
use crate::worksheet_xml::parse_cell_style_ids;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SourceFormat {
Xlsx,
Xls,
Xlsb,
Ods,
Csv,
}
impl SourceFormat {
fn from_extension(path: &Path) -> Result<Self> {
let ext = path
.extension()
.and_then(|e| e.to_str())
.map(|s| s.to_ascii_lowercase());
match ext.as_deref() {
Some("xlsx" | "xlsm" | "xlam") => Ok(SourceFormat::Xlsx),
Some("xls" | "xla") => Ok(SourceFormat::Xls),
Some("xlsb") => Ok(SourceFormat::Xlsb),
Some("ods") => Ok(SourceFormat::Ods),
Some("csv" | "tsv" | "txt") => Ok(SourceFormat::Csv),
Some(other) => Err(Error::Format(format!(
"unsupported file extension: .{other} (supported: xlsx, xlsm, xlam, xls, xla, xlsb, ods, csv, tsv, txt)"
))),
None => Err(Error::Format(
"cannot detect format: file has no extension".to_string(),
)),
}
}
}
pub(crate) enum Backend {
Sheets(SheetsReader),
Csv(CsvBackend),
}
pub struct WorkbookStyles {
cell_xfs: Vec<XfEntry>,
num_fmts: HashMap<u32, String>,
sheet_xml_paths: HashMap<String, String>,
per_sheet_style_ids: HashMap<String, HashMap<(u32, u32), u32>>,
zip_path: PathBuf,
}
impl WorkbookStyles {
fn load(zip_path: &Path) -> Result<Self> {
let file = File::open(zip_path)?;
let mut zip = ZipArchive::new(file)
.map_err(|e| Error::Xlsx(format!("failed to open xlsx zip: {e}")))?;
let styles_xml = zip_read_to_string_opt(&mut zip, "xl/styles.xml")?;
let cell_xfs = match styles_xml.as_deref() {
Some(xml) => parse_cellxfs(xml),
None => Vec::new(),
};
let num_fmts = match styles_xml.as_deref() {
Some(xml) => parse_num_fmts(xml)?,
None => HashMap::new(),
};
let workbook_xml = zip_read_to_string(&mut zip, "xl/workbook.xml")?;
let rels_xml = zip_read_to_string(&mut zip, "xl/_rels/workbook.xml.rels")?;
let sheet_rids = parse_workbook_sheet_rids(&workbook_xml)?;
let rel_targets = parse_relationship_targets(&rels_xml)?;
let mut sheet_xml_paths: HashMap<String, String> = HashMap::new();
for (name, rid) in sheet_rids {
if let Some(target) = rel_targets.get(&rid) {
sheet_xml_paths.insert(name, join_and_normalize("xl/", target));
}
}
Ok(Self {
cell_xfs,
num_fmts,
sheet_xml_paths,
per_sheet_style_ids: HashMap::new(),
zip_path: zip_path.to_path_buf(),
})
}
pub fn number_format_for_style_id(&self, style_id: u32) -> Option<&str> {
if style_id == 0 {
return None;
}
let xf = self.cell_xfs.get(style_id as usize)?;
let code = crate::styles::resolve_num_fmt(xf.num_fmt_id, &self.num_fmts)?;
if code.trim().is_empty() || code.eq_ignore_ascii_case("General") {
None
} else {
Some(code)
}
}
pub fn sheet_style_ids(&self, sheet_name: &str) -> Option<&HashMap<(u32, u32), u32>> {
self.per_sheet_style_ids.get(sheet_name)
}
pub fn sheet_style_ids_mut(&mut self, sheet_name: &str) -> Result<&HashMap<(u32, u32), u32>> {
if !self.per_sheet_style_ids.contains_key(sheet_name) {
let Some(path) = self.sheet_xml_paths.get(sheet_name).cloned() else {
self.per_sheet_style_ids
.insert(sheet_name.to_string(), HashMap::new());
return Ok(self.per_sheet_style_ids.get(sheet_name).unwrap());
};
let file = File::open(&self.zip_path)?;
let mut zip = ZipArchive::new(file)
.map_err(|e| Error::Xlsx(format!("failed to open xlsx zip: {e}")))?;
let map = match zip_read_to_string_opt(&mut zip, &path)? {
Some(xml) => parse_cell_style_ids(&xml)?,
None => HashMap::new(),
};
self.per_sheet_style_ids.insert(sheet_name.to_string(), map);
}
Ok(self.per_sheet_style_ids.get(sheet_name).unwrap())
}
#[cfg(test)]
pub fn cell_xfs(&self) -> &[XfEntry] {
&self.cell_xfs
}
}
pub struct Workbook {
inner: Backend,
sheet_names: Vec<String>,
path: PathBuf,
format: SourceFormat,
styles: Option<WorkbookStyles>,
}
impl Workbook {
pub fn open(path: impl AsRef<Path>) -> Result<Self> {
let path = path.as_ref().to_path_buf();
let format = SourceFormat::from_extension(&path)?;
match format {
SourceFormat::Xlsx | SourceFormat::Xls | SourceFormat::Xlsb | SourceFormat::Ods => {
let mut inner: SheetsReader = open_workbook_auto(&path)
.map_err(|e| Error::Xlsx(format!("failed to open workbook: {e}")))?;
if let Sheets::Xlsx(ref mut x) = inner {
let _ = x.load_tables();
}
let sheet_names = inner.sheet_names().to_vec();
Ok(Self {
inner: Backend::Sheets(inner),
sheet_names,
path,
format,
styles: None,
})
}
SourceFormat::Csv => {
let backend = CsvBackend::open(&path)?;
let sheet_names = backend.sheet_names();
Ok(Self {
inner: Backend::Csv(backend),
sheet_names,
path,
format,
styles: None,
})
}
}
}
pub fn path(&self) -> &Path {
&self.path
}
pub fn format(&self) -> SourceFormat {
self.format
}
pub fn sheet_names(&self) -> &[String] {
&self.sheet_names
}
pub fn styles(&mut self) -> Result<&mut WorkbookStyles> {
if self.format != SourceFormat::Xlsx {
return Err(Error::Xlsx(format!(
"styles walker only supports xlsx/xlsm/xlam; workbook format is {:?}",
self.format
)));
}
if self.styles.is_none() {
self.styles = Some(WorkbookStyles::load(&self.path)?);
}
Ok(self.styles.as_mut().unwrap())
}
pub fn sheet(&mut self, name: &str) -> Result<Sheet> {
if !self.sheet_names.iter().any(|n| n == name) {
return Err(Error::SheetNotFound(name.to_string()));
}
match &mut self.inner {
Backend::Sheets(sheets) => {
if self.format == SourceFormat::Xlsx && self.styles.is_none() {
self.styles = WorkbookStyles::load(&self.path).ok();
}
Sheet::load(sheets, name, self.styles.as_mut())
}
Backend::Csv(csv) => csv.load_sheet(name),
}
}
pub fn first_sheet(&mut self) -> Result<Sheet> {
let name = self
.sheet_names
.first()
.ok_or_else(|| Error::SheetNotFound("(workbook has no sheets)".to_string()))?
.clone();
self.sheet(&name)
}
pub fn named_ranges(&self) -> Vec<(String, String)> {
match &self.inner {
Backend::Sheets(s) => s.defined_names().to_vec(),
Backend::Csv(_) => Vec::new(),
}
}
pub fn table_names_in_sheet(&self, sheet_name: &str) -> Vec<String> {
match &self.inner {
Backend::Sheets(Sheets::Xlsx(x)) => x
.table_names_in_sheet(sheet_name)
.into_iter()
.cloned()
.collect(),
_ => Vec::new(),
}
}
pub fn map(&mut self) -> Result<WorkbookMap> {
let path = self.path.to_string_lossy().into_owned();
let named_ranges = self.named_ranges();
let names = self.sheet_names.clone();
let mut sheets = Vec::with_capacity(names.len());
for name in &names {
let tables = self.table_names_in_sheet(name);
let sheet = self.sheet(name)?;
let (rows, cols) = sheet.dimensions();
sheets.push(SheetMap {
name: name.clone(),
rows,
cols,
class: classify_sheet(&sheet),
headers: sheet.headers(),
tables,
});
}
Ok(WorkbookMap {
path,
sheets,
named_ranges,
})
}
}