1use std::collections::HashMap;
2use std::fs::File;
3use std::path::{Path, PathBuf};
4
5use calamine_styles::{open_workbook_auto, Reader, Sheets};
6use zip::ZipArchive;
7
8use crate::csv_reader::CsvBackend;
9use crate::error::{Error, Result};
10use crate::map::{classify_sheet, SheetMap, WorkbookMap};
11use crate::ooxml::{
12 join_and_normalize, parse_relationship_targets, parse_workbook_sheet_rids, zip_read_to_string,
13 zip_read_to_string_opt,
14};
15use crate::sheet::{Sheet, SheetsReader};
16use crate::styles::{parse_cellxfs, parse_num_fmts, XfEntry};
17use crate::worksheet_xml::parse_cell_style_ids;
18
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
23pub enum SourceFormat {
24 Xlsx,
25 Xls,
26 Xlsb,
27 Ods,
28 Csv,
29}
30
31impl SourceFormat {
32 fn from_extension(path: &Path) -> Result<Self> {
33 let ext = path
34 .extension()
35 .and_then(|e| e.to_str())
36 .map(|s| s.to_ascii_lowercase());
37 match ext.as_deref() {
38 Some("xlsx" | "xlsm" | "xlam") => Ok(SourceFormat::Xlsx),
39 Some("xls" | "xla") => Ok(SourceFormat::Xls),
40 Some("xlsb") => Ok(SourceFormat::Xlsb),
41 Some("ods") => Ok(SourceFormat::Ods),
42 Some("csv" | "tsv" | "txt") => Ok(SourceFormat::Csv),
43 Some(other) => Err(Error::Format(format!(
44 "unsupported file extension: .{other} (supported: xlsx, xlsm, xlam, xls, xla, xlsb, ods, csv, tsv, txt)"
45 ))),
46 None => Err(Error::Format(
47 "cannot detect format: file has no extension".to_string(),
48 )),
49 }
50 }
51}
52
53pub(crate) enum Backend {
57 Sheets(SheetsReader),
58 Csv(CsvBackend),
59}
60
61pub struct WorkbookStyles {
68 cell_xfs: Vec<XfEntry>,
69 num_fmts: HashMap<u32, String>,
70 sheet_xml_paths: HashMap<String, String>,
71 per_sheet_style_ids: HashMap<String, HashMap<(u32, u32), u32>>,
72 zip_path: PathBuf,
73}
74
75impl WorkbookStyles {
76 fn load(zip_path: &Path) -> Result<Self> {
77 let file = File::open(zip_path)?;
78 let mut zip = ZipArchive::new(file)
79 .map_err(|e| Error::Xlsx(format!("failed to open xlsx zip: {e}")))?;
80
81 let styles_xml = zip_read_to_string_opt(&mut zip, "xl/styles.xml")?;
82 let cell_xfs = match styles_xml.as_deref() {
83 Some(xml) => parse_cellxfs(xml),
84 None => Vec::new(),
85 };
86 let num_fmts = match styles_xml.as_deref() {
87 Some(xml) => parse_num_fmts(xml)?,
88 None => HashMap::new(),
89 };
90
91 let workbook_xml = zip_read_to_string(&mut zip, "xl/workbook.xml")?;
92 let rels_xml = zip_read_to_string(&mut zip, "xl/_rels/workbook.xml.rels")?;
93 let sheet_rids = parse_workbook_sheet_rids(&workbook_xml)?;
94 let rel_targets = parse_relationship_targets(&rels_xml)?;
95 let mut sheet_xml_paths: HashMap<String, String> = HashMap::new();
96 for (name, rid) in sheet_rids {
97 if let Some(target) = rel_targets.get(&rid) {
98 sheet_xml_paths.insert(name, join_and_normalize("xl/", target));
99 }
100 }
101
102 Ok(Self {
103 cell_xfs,
104 num_fmts,
105 sheet_xml_paths,
106 per_sheet_style_ids: HashMap::new(),
107 zip_path: zip_path.to_path_buf(),
108 })
109 }
110
111 pub fn number_format_for_style_id(&self, style_id: u32) -> Option<&str> {
116 if style_id == 0 {
117 return None;
118 }
119 let xf = self.cell_xfs.get(style_id as usize)?;
120 let code = crate::styles::resolve_num_fmt(xf.num_fmt_id, &self.num_fmts)?;
121 if code.trim().is_empty() || code.eq_ignore_ascii_case("General") {
122 None
123 } else {
124 Some(code)
125 }
126 }
127
128 pub fn sheet_style_ids(&self, sheet_name: &str) -> Option<&HashMap<(u32, u32), u32>> {
133 self.per_sheet_style_ids.get(sheet_name)
134 }
135
136 pub fn sheet_style_ids_mut(&mut self, sheet_name: &str) -> Result<&HashMap<(u32, u32), u32>> {
140 if !self.per_sheet_style_ids.contains_key(sheet_name) {
141 let Some(path) = self.sheet_xml_paths.get(sheet_name).cloned() else {
142 self.per_sheet_style_ids
143 .insert(sheet_name.to_string(), HashMap::new());
144 return Ok(self.per_sheet_style_ids.get(sheet_name).unwrap());
145 };
146 let file = File::open(&self.zip_path)?;
147 let mut zip = ZipArchive::new(file)
148 .map_err(|e| Error::Xlsx(format!("failed to open xlsx zip: {e}")))?;
149 let map = match zip_read_to_string_opt(&mut zip, &path)? {
150 Some(xml) => parse_cell_style_ids(&xml)?,
151 None => HashMap::new(),
152 };
153 self.per_sheet_style_ids.insert(sheet_name.to_string(), map);
154 }
155 Ok(self.per_sheet_style_ids.get(sheet_name).unwrap())
156 }
157
158 #[cfg(test)]
160 pub fn cell_xfs(&self) -> &[XfEntry] {
161 &self.cell_xfs
162 }
163}
164
165pub struct Workbook {
166 inner: Backend,
167 sheet_names: Vec<String>,
168 path: PathBuf,
169 format: SourceFormat,
170 styles: Option<WorkbookStyles>,
171}
172
173impl Workbook {
174 pub fn open(path: impl AsRef<Path>) -> Result<Self> {
183 let path = path.as_ref().to_path_buf();
184 let format = SourceFormat::from_extension(&path)?;
185
186 match format {
187 SourceFormat::Xlsx | SourceFormat::Xls | SourceFormat::Xlsb | SourceFormat::Ods => {
188 let mut inner: SheetsReader = open_workbook_auto(&path)
189 .map_err(|e| Error::Xlsx(format!("failed to open workbook: {e}")))?;
190 if let Sheets::Xlsx(ref mut x) = inner {
193 let _ = x.load_tables();
194 }
195 let sheet_names = inner.sheet_names().to_vec();
196 Ok(Self {
197 inner: Backend::Sheets(inner),
198 sheet_names,
199 path,
200 format,
201 styles: None,
202 })
203 }
204 SourceFormat::Csv => {
205 let backend = CsvBackend::open(&path)?;
206 let sheet_names = backend.sheet_names();
207 Ok(Self {
208 inner: Backend::Csv(backend),
209 sheet_names,
210 path,
211 format,
212 styles: None,
213 })
214 }
215 }
216 }
217
218 pub fn path(&self) -> &Path {
219 &self.path
220 }
221
222 pub fn format(&self) -> SourceFormat {
223 self.format
224 }
225
226 pub fn sheet_names(&self) -> &[String] {
227 &self.sheet_names
228 }
229
230 pub fn styles(&mut self) -> Result<&mut WorkbookStyles> {
237 if self.format != SourceFormat::Xlsx {
238 return Err(Error::Xlsx(format!(
239 "styles walker only supports xlsx/xlsm/xlam; workbook format is {:?}",
240 self.format
241 )));
242 }
243 if self.styles.is_none() {
244 self.styles = Some(WorkbookStyles::load(&self.path)?);
245 }
246 Ok(self.styles.as_mut().unwrap())
247 }
248
249 pub fn sheet(&mut self, name: &str) -> Result<Sheet> {
252 if !self.sheet_names.iter().any(|n| n == name) {
253 return Err(Error::SheetNotFound(name.to_string()));
254 }
255 match &mut self.inner {
256 Backend::Sheets(sheets) => {
257 if self.format == SourceFormat::Xlsx && self.styles.is_none() {
261 self.styles = WorkbookStyles::load(&self.path).ok();
262 }
263 Sheet::load(sheets, name, self.styles.as_mut())
264 }
265 Backend::Csv(csv) => csv.load_sheet(name),
266 }
267 }
268
269 pub fn first_sheet(&mut self) -> Result<Sheet> {
271 let name = self
272 .sheet_names
273 .first()
274 .ok_or_else(|| Error::SheetNotFound("(workbook has no sheets)".to_string()))?
275 .clone();
276 self.sheet(&name)
277 }
278
279 pub fn named_ranges(&self) -> Vec<(String, String)> {
282 match &self.inner {
283 Backend::Sheets(s) => s.defined_names().to_vec(),
284 Backend::Csv(_) => Vec::new(),
285 }
286 }
287
288 pub fn table_names_in_sheet(&self, sheet_name: &str) -> Vec<String> {
291 match &self.inner {
292 Backend::Sheets(Sheets::Xlsx(x)) => x
293 .table_names_in_sheet(sheet_name)
294 .into_iter()
295 .cloned()
296 .collect(),
297 _ => Vec::new(),
298 }
299 }
300
301 pub fn map(&mut self) -> Result<WorkbookMap> {
306 let path = self.path.to_string_lossy().into_owned();
307 let named_ranges = self.named_ranges();
308 let names = self.sheet_names.clone();
309 let mut sheets = Vec::with_capacity(names.len());
310 for name in &names {
311 let tables = self.table_names_in_sheet(name);
312 let sheet = self.sheet(name)?;
313 let (rows, cols) = sheet.dimensions();
314 sheets.push(SheetMap {
315 name: name.clone(),
316 rows,
317 cols,
318 class: classify_sheet(&sheet),
319 headers: sheet.headers(),
320 tables,
321 });
322 }
323 Ok(WorkbookMap {
324 path,
325 sheets,
326 named_ranges,
327 })
328 }
329}