Skip to main content

sheetkit_core/workbook/
mod.rs

1//! Workbook file I/O: reading and writing `.xlsx` files.
2//!
3//! An `.xlsx` file is a ZIP archive containing XML parts. This module provides
4//! [`Workbook`] which holds the parsed XML structures in memory and can
5//! serialize them back to a valid `.xlsx` file.
6
7use std::collections::{HashMap, HashSet};
8use std::io::{Read as _, Write as _};
9use std::path::Path;
10
11use serde::Serialize;
12use sheetkit_xml::chart::ChartSpace;
13use sheetkit_xml::comments::Comments;
14use sheetkit_xml::content_types::{
15    mime_types, ContentTypeDefault, ContentTypeOverride, ContentTypes,
16};
17
18/// The OOXML package format, determined by the workbook content type in
19/// `[Content_Types].xml`. Controls which content type string is emitted for
20/// `xl/workbook.xml` on save.
21#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
22pub enum WorkbookFormat {
23    /// Standard spreadsheet (.xlsx).
24    #[default]
25    Xlsx,
26    /// Macro-enabled spreadsheet (.xlsm).
27    Xlsm,
28    /// Template (.xltx).
29    Xltx,
30    /// Macro-enabled template (.xltm).
31    Xltm,
32    /// Macro-enabled add-in (.xlam).
33    Xlam,
34}
35
36impl WorkbookFormat {
37    /// Infer the format from a workbook content type string found in
38    /// `[Content_Types].xml`.
39    pub fn from_content_type(ct: &str) -> Option<Self> {
40        match ct {
41            mime_types::WORKBOOK => Some(Self::Xlsx),
42            mime_types::WORKBOOK_MACRO => Some(Self::Xlsm),
43            mime_types::WORKBOOK_TEMPLATE => Some(Self::Xltx),
44            mime_types::WORKBOOK_TEMPLATE_MACRO => Some(Self::Xltm),
45            mime_types::WORKBOOK_ADDIN_MACRO => Some(Self::Xlam),
46            _ => None,
47        }
48    }
49
50    /// Infer the format from a file extension (case-insensitive, without the
51    /// leading dot). Returns `None` for unrecognized extensions.
52    pub fn from_extension(ext: &str) -> Option<Self> {
53        match ext.to_ascii_lowercase().as_str() {
54            "xlsx" => Some(Self::Xlsx),
55            "xlsm" => Some(Self::Xlsm),
56            "xltx" => Some(Self::Xltx),
57            "xltm" => Some(Self::Xltm),
58            "xlam" => Some(Self::Xlam),
59            _ => None,
60        }
61    }
62
63    /// Return the OOXML content type string for this format.
64    pub fn content_type(self) -> &'static str {
65        match self {
66            Self::Xlsx => mime_types::WORKBOOK,
67            Self::Xlsm => mime_types::WORKBOOK_MACRO,
68            Self::Xltx => mime_types::WORKBOOK_TEMPLATE,
69            Self::Xltm => mime_types::WORKBOOK_TEMPLATE_MACRO,
70            Self::Xlam => mime_types::WORKBOOK_ADDIN_MACRO,
71        }
72    }
73}
74
75use sheetkit_xml::drawing::{MarkerType, WsDr};
76use sheetkit_xml::relationships::{self, rel_types, Relationship, Relationships};
77use sheetkit_xml::shared_strings::Sst;
78use sheetkit_xml::styles::StyleSheet;
79use sheetkit_xml::workbook::{WorkbookProtection, WorkbookXml};
80use sheetkit_xml::worksheet::{Cell, CellFormula, CellTypeTag, DrawingRef, Row, WorksheetXml};
81use zip::write::SimpleFileOptions;
82use zip::CompressionMethod;
83
84use crate::cell::CellValue;
85use crate::cell_ref_shift::shift_cell_references_in_text;
86use crate::chart::ChartConfig;
87use crate::comment::CommentConfig;
88use crate::conditional::ConditionalFormatRule;
89use crate::error::{Error, Result};
90use crate::image::ImageConfig;
91use crate::pivot::{PivotTableConfig, PivotTableInfo};
92use crate::protection::WorkbookProtectionConfig;
93use crate::sst::SharedStringTable;
94use crate::threaded_comment::{PersonData, PersonInput, ThreadedCommentData, ThreadedCommentInput};
95use crate::utils::cell_ref::{cell_name_to_coordinates, column_name_to_number};
96use crate::utils::constants::MAX_CELL_CHARS;
97use crate::validation::DataValidationConfig;
98use crate::workbook_paths::{
99    default_relationships, relationship_part_path, relative_relationship_target,
100    resolve_relationship_target,
101};
102
103mod cell_ops;
104mod data;
105mod drawing;
106mod features;
107mod io;
108mod open_options;
109mod sheet_ops;
110
111pub use open_options::OpenOptions;
112
113/// XML declaration prepended to every XML part in the package.
114const XML_DECLARATION: &str = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>"#;
115
116/// In-memory representation of an `.xlsx` workbook.
117pub struct Workbook {
118    format: WorkbookFormat,
119    content_types: ContentTypes,
120    package_rels: Relationships,
121    workbook_xml: WorkbookXml,
122    workbook_rels: Relationships,
123    worksheets: Vec<(String, WorksheetXml)>,
124    stylesheet: StyleSheet,
125    sst_runtime: SharedStringTable,
126    /// Per-sheet comments, parallel to the `worksheets` vector.
127    sheet_comments: Vec<Option<Comments>>,
128    /// Chart parts: (zip path like "xl/charts/chart1.xml", ChartSpace data).
129    charts: Vec<(String, ChartSpace)>,
130    /// Chart parts preserved as raw XML when typed parsing is not supported.
131    raw_charts: Vec<(String, Vec<u8>)>,
132    /// Drawing parts: (zip path like "xl/drawings/drawing1.xml", WsDr data).
133    drawings: Vec<(String, WsDr)>,
134    /// Image parts: (zip path like "xl/media/image1.png", raw bytes).
135    images: Vec<(String, Vec<u8>)>,
136    /// Maps sheet index -> drawing index in `drawings`.
137    #[allow(dead_code)]
138    worksheet_drawings: HashMap<usize, usize>,
139    /// Per-sheet worksheet relationship files.
140    worksheet_rels: HashMap<usize, Relationships>,
141    /// Per-drawing relationship files: drawing_index -> Relationships.
142    drawing_rels: HashMap<usize, Relationships>,
143    /// Core document properties (docProps/core.xml).
144    core_properties: Option<sheetkit_xml::doc_props::CoreProperties>,
145    /// Extended/application properties (docProps/app.xml).
146    app_properties: Option<sheetkit_xml::doc_props::ExtendedProperties>,
147    /// Custom properties (docProps/custom.xml).
148    custom_properties: Option<sheetkit_xml::doc_props::CustomProperties>,
149    /// Pivot table parts: (zip path, PivotTableDefinition data).
150    pivot_tables: Vec<(String, sheetkit_xml::pivot_table::PivotTableDefinition)>,
151    /// Pivot cache definition parts: (zip path, PivotCacheDefinition data).
152    pivot_cache_defs: Vec<(String, sheetkit_xml::pivot_cache::PivotCacheDefinition)>,
153    /// Pivot cache records parts: (zip path, PivotCacheRecords data).
154    pivot_cache_records: Vec<(String, sheetkit_xml::pivot_cache::PivotCacheRecords)>,
155    /// Raw theme XML bytes from xl/theme/theme1.xml (preserved for round-trip).
156    theme_xml: Option<Vec<u8>>,
157    /// Parsed theme colors from the theme XML.
158    theme_colors: sheetkit_xml::theme::ThemeColors,
159    /// Per-sheet sparkline configurations, parallel to the `worksheets` vector.
160    sheet_sparklines: Vec<Vec<crate::sparkline::SparklineConfig>>,
161    /// Per-sheet VML drawing bytes (for legacy comment rendering), parallel to `worksheets`.
162    /// `None` means no VML part exists for that sheet.
163    sheet_vml: Vec<Option<Vec<u8>>>,
164    /// ZIP entries not recognized by the parser, preserved for round-trip fidelity.
165    /// Each entry is (zip_path, raw_bytes).
166    unknown_parts: Vec<(String, Vec<u8>)>,
167    /// Raw VBA project binary blob (`xl/vbaProject.bin`), preserved for round-trip
168    /// and used for VBA module extraction. `None` for non-macro workbooks.
169    vba_blob: Option<Vec<u8>>,
170    /// Table parts: (zip path like "xl/tables/table1.xml", TableXml data, sheet_index).
171    tables: Vec<(String, sheetkit_xml::table::TableXml, usize)>,
172    /// Raw XML bytes for sheets that were not parsed during selective open.
173    /// Parallel to `worksheets`. `Some(bytes)` means the sheet was skipped
174    /// and the raw bytes should be written directly on save.
175    raw_sheet_xml: Vec<Option<Vec<u8>>>,
176    /// Slicer definition parts: (zip path, SlicerDefinitions data).
177    slicer_defs: Vec<(String, sheetkit_xml::slicer::SlicerDefinitions)>,
178    /// Slicer cache definition parts: (zip path, raw XML string).
179    slicer_caches: Vec<(String, sheetkit_xml::slicer::SlicerCacheDefinition)>,
180    /// Per-sheet threaded comments (Excel 2019+), parallel to the `worksheets` vector.
181    sheet_threaded_comments: Vec<Option<sheetkit_xml::threaded_comment::ThreadedComments>>,
182    /// Person list shared across all sheets (for threaded comment authors).
183    person_list: sheetkit_xml::threaded_comment::PersonList,
184    /// Per-sheet form control configurations, parallel to `worksheets`.
185    sheet_form_controls: Vec<Vec<crate::control::FormControlConfig>>,
186    /// O(1) sheet name -> index lookup cache. Must be kept in sync with
187    /// `worksheets` via [`rebuild_sheet_index`].
188    sheet_name_index: HashMap<String, usize>,
189}
190
191impl Workbook {
192    /// Return the detected or assigned workbook format.
193    pub fn format(&self) -> WorkbookFormat {
194        self.format
195    }
196
197    /// Set the workbook format. This determines the content type written for
198    /// `xl/workbook.xml` on save.
199    pub fn set_format(&mut self, format: WorkbookFormat) {
200        self.format = format;
201    }
202
203    /// Get the 0-based index of a sheet by name. O(1) via HashMap.
204    pub(crate) fn sheet_index(&self, sheet: &str) -> Result<usize> {
205        self.sheet_name_index
206            .get(sheet)
207            .copied()
208            .ok_or_else(|| Error::SheetNotFound {
209                name: sheet.to_string(),
210            })
211    }
212
213    /// Get a mutable reference to the worksheet XML for the named sheet.
214    pub(crate) fn worksheet_mut(&mut self, sheet: &str) -> Result<&mut WorksheetXml> {
215        let idx = self.sheet_index(sheet)?;
216        Ok(&mut self.worksheets[idx].1)
217    }
218
219    /// Get an immutable reference to the worksheet XML for the named sheet.
220    pub(crate) fn worksheet_ref(&self, sheet: &str) -> Result<&WorksheetXml> {
221        let idx = self.sheet_index(sheet)?;
222        Ok(&self.worksheets[idx].1)
223    }
224
225    /// Public immutable reference to a worksheet's XML by sheet name.
226    pub fn worksheet_xml_ref(&self, sheet: &str) -> Result<&WorksheetXml> {
227        self.worksheet_ref(sheet)
228    }
229
230    /// Public immutable reference to the shared string table.
231    pub fn sst_ref(&self) -> &SharedStringTable {
232        &self.sst_runtime
233    }
234
235    /// Rebuild the sheet name -> index lookup after any structural change
236    /// to the worksheets vector.
237    pub(crate) fn rebuild_sheet_index(&mut self) {
238        self.sheet_name_index.clear();
239        for (i, (name, _)) in self.worksheets.iter().enumerate() {
240            self.sheet_name_index.insert(name.clone(), i);
241        }
242    }
243
244    /// Resolve the part path for a sheet index from workbook relationships.
245    /// Falls back to the default `xl/worksheets/sheet{N}.xml` naming.
246    pub(crate) fn sheet_part_path(&self, sheet_idx: usize) -> String {
247        if let Some(sheet_entry) = self.workbook_xml.sheets.sheets.get(sheet_idx) {
248            if let Some(rel) = self
249                .workbook_rels
250                .relationships
251                .iter()
252                .find(|r| r.id == sheet_entry.r_id && r.rel_type == rel_types::WORKSHEET)
253            {
254                return resolve_relationship_target("xl/workbook.xml", &rel.target);
255            }
256        }
257        format!("xl/worksheets/sheet{}.xml", sheet_idx + 1)
258    }
259}