Skip to main content

sheetkit_core/workbook/
open_options.rs

1/// Controls how worksheets and auxiliary parts are parsed during open.
2#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
3pub enum ReadMode {
4    /// Parse all parts eagerly. Equivalent to the old `Full` mode.
5    Eager,
6    /// Skip auxiliary parts (comments, drawings, charts, images, doc props,
7    /// pivot tables, slicers, threaded comments, VBA, tables, form controls).
8    /// These are stored as raw bytes for on-demand parsing or direct
9    /// round-trip preservation. Equivalent to the old `ReadFast` mode.
10    /// Will evolve into true lazy on-demand hydration in later workstreams.
11    /// This is the default mode.
12    #[default]
13    Lazy,
14    /// Forward-only streaming read mode (reserved for future use).
15    /// Currently behaves the same as `Lazy`.
16    Stream,
17}
18
19/// Controls when auxiliary parts (comments, charts, images, etc.) are parsed.
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
21pub enum AuxParts {
22    /// Parse auxiliary parts only when accessed. This is the default.
23    #[default]
24    Deferred,
25    /// Parse all auxiliary parts during open.
26    EagerLoad,
27}
28
29/// Controls how number cells carrying a date-like number format are
30/// returned.
31///
32/// OOXML distinguishes between a cell's value type (`t="n"`, `t="d"`, ...)
33/// and the number format applied to it. Microsoft Excel itself almost
34/// never emits `t="d"`; dates are instead stored as `t="n"` with a date
35/// number format attached. Depending on the caller's needs, sheetkit can
36/// either honor the `t` attribute literally or promote such number cells
37/// to [`CellValue::Date`](crate::cell::CellValue::Date).
38#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
39pub enum DateInterpretation {
40    /// Follow the cell's `t` attribute strictly. A `t="n"` cell is
41    /// returned as `CellValue::Number` even when its number format is a
42    /// date format; only `t="d"` cells become `CellValue::Date`. Matches
43    /// the OOXML spec literally. Opt in when you want to treat the raw
44    /// cell type as the source of truth.
45    CellType,
46    /// Promote `t="n"` (or untyped) cells whose style references a
47    /// built-in date number format (IDs 14-22, 45-47) or a custom format
48    /// code containing date/time tokens (y, m, d, h, s) to
49    /// `CellValue::Date`. This is the default, and matches how Microsoft
50    /// Excel stores dates in practice.
51    #[default]
52    NumFmt,
53}
54
55/// Options for controlling how a workbook is opened and parsed.
56///
57/// All fields default to `None` (no limit). Read mode defaults to `Lazy`
58/// and auxiliary parts default to `Deferred`.
59/// Use the builder-style setter methods for convenience.
60#[derive(Debug, Clone, Default)]
61pub struct OpenOptions {
62    /// Maximum number of rows to read per sheet. Rows beyond this limit
63    /// are silently discarded during parsing.
64    pub sheet_rows: Option<u32>,
65
66    /// Only parse sheets whose names are in this list. Sheets not listed
67    /// are represented as empty worksheets (their XML is not parsed).
68    /// `None` means parse all sheets.
69    pub sheets: Option<Vec<String>>,
70
71    /// Maximum total decompressed size of all ZIP entries in bytes.
72    /// Exceeding this limit returns [`Error::ZipSizeExceeded`].
73    /// Default when `None`: no limit.
74    pub max_unzip_size: Option<u64>,
75
76    /// Maximum number of ZIP entries allowed.
77    /// Exceeding this limit returns [`Error::ZipEntryCountExceeded`].
78    /// Default when `None`: no limit.
79    pub max_zip_entries: Option<usize>,
80
81    /// Read mode: `Lazy` (default) skips auxiliary parts for faster
82    /// read-only workloads; `Eager` parses everything; `Stream` is
83    /// reserved for future streaming reads.
84    pub read_mode: ReadMode,
85
86    /// Controls when auxiliary parts are parsed.
87    pub aux_parts: AuxParts,
88
89    /// Controls how date-formatted number cells are returned. Defaults to
90    /// [`DateInterpretation::NumFmt`], which matches how Excel authors
91    /// files in practice. Use [`DateInterpretation::CellType`] when you
92    /// want spec-literal behavior.
93    pub date_interpretation: DateInterpretation,
94}
95
96impl OpenOptions {
97    /// Create a new `OpenOptions` with defaults (no limits, lazy read, deferred aux parts).
98    pub fn new() -> Self {
99        Self::default()
100    }
101
102    /// Set the maximum number of rows to read per sheet.
103    pub fn sheet_rows(mut self, rows: u32) -> Self {
104        self.sheet_rows = Some(rows);
105        self
106    }
107
108    /// Only parse sheets whose names are in this list.
109    pub fn sheets(mut self, names: Vec<String>) -> Self {
110        self.sheets = Some(names);
111        self
112    }
113
114    /// Set the maximum total decompressed size in bytes.
115    pub fn max_unzip_size(mut self, size: u64) -> Self {
116        self.max_unzip_size = Some(size);
117        self
118    }
119
120    /// Set the maximum number of ZIP entries.
121    pub fn max_zip_entries(mut self, count: usize) -> Self {
122        self.max_zip_entries = Some(count);
123        self
124    }
125
126    /// Set the read mode. `Lazy` skips auxiliary parts for faster
127    /// read-only workloads. `Stream` is reserved for future use.
128    pub fn read_mode(mut self, mode: ReadMode) -> Self {
129        self.read_mode = mode;
130        self
131    }
132
133    /// Set the auxiliary parts parsing policy.
134    pub fn aux_parts(mut self, policy: AuxParts) -> Self {
135        self.aux_parts = policy;
136        self
137    }
138
139    /// Set the date interpretation policy. When set to
140    /// [`DateInterpretation::NumFmt`], number cells that carry a date
141    /// number format are returned as `CellValue::Date` instead of
142    /// `CellValue::Number`.
143    pub fn date_interpretation(mut self, interpretation: DateInterpretation) -> Self {
144        self.date_interpretation = interpretation;
145        self
146    }
147
148    /// Returns true when auxiliary parts should be skipped during open.
149    /// Lazy/Stream modes always skip. Eager mode respects `aux_parts`.
150    pub(crate) fn skip_aux_parts(&self) -> bool {
151        match self.read_mode {
152            ReadMode::Eager => self.aux_parts == AuxParts::Deferred,
153            ReadMode::Lazy | ReadMode::Stream => true,
154        }
155    }
156
157    /// Returns true when mode is `Eager`.
158    #[allow(dead_code)]
159    pub(crate) fn is_eager(&self) -> bool {
160        self.read_mode == ReadMode::Eager
161    }
162
163    /// Returns true when mode is `Lazy`.
164    #[allow(dead_code)]
165    pub(crate) fn is_lazy(&self) -> bool {
166        self.read_mode == ReadMode::Lazy
167    }
168
169    /// Returns true when mode is `Stream`.
170    #[allow(dead_code)]
171    pub(crate) fn is_stream(&self) -> bool {
172        self.read_mode == ReadMode::Stream
173    }
174
175    /// Check whether a given sheet name should be parsed based on the `sheets` filter.
176    pub(crate) fn should_parse_sheet(&self, name: &str) -> bool {
177        match &self.sheets {
178            None => true,
179            Some(names) => names.iter().any(|n| n == name),
180        }
181    }
182}
183
184#[cfg(test)]
185mod tests {
186    use super::*;
187
188    #[test]
189    fn test_default_options() {
190        let opts = OpenOptions::default();
191        assert!(opts.sheet_rows.is_none());
192        assert!(opts.sheets.is_none());
193        assert!(opts.max_unzip_size.is_none());
194        assert!(opts.max_zip_entries.is_none());
195        assert_eq!(opts.read_mode, ReadMode::Lazy);
196        assert!(opts.skip_aux_parts());
197        assert_eq!(opts.date_interpretation, DateInterpretation::NumFmt);
198    }
199
200    #[test]
201    fn test_date_interpretation_default_is_num_fmt() {
202        assert_eq!(DateInterpretation::default(), DateInterpretation::NumFmt);
203    }
204
205    #[test]
206    fn test_date_interpretation_builder() {
207        let opts = OpenOptions::new().date_interpretation(DateInterpretation::CellType);
208        assert_eq!(opts.date_interpretation, DateInterpretation::CellType);
209
210        let opts = opts.date_interpretation(DateInterpretation::NumFmt);
211        assert_eq!(opts.date_interpretation, DateInterpretation::NumFmt);
212    }
213
214    #[test]
215    fn test_builder_methods() {
216        let opts = OpenOptions::new()
217            .sheet_rows(100)
218            .sheets(vec!["Sheet1".to_string()])
219            .max_unzip_size(1_000_000)
220            .max_zip_entries(500);
221        assert_eq!(opts.sheet_rows, Some(100));
222        assert_eq!(opts.sheets, Some(vec!["Sheet1".to_string()]));
223        assert_eq!(opts.max_unzip_size, Some(1_000_000));
224        assert_eq!(opts.max_zip_entries, Some(500));
225    }
226
227    #[test]
228    fn test_read_mode_builder() {
229        let opts = OpenOptions::new().read_mode(ReadMode::Lazy);
230        assert_eq!(opts.read_mode, ReadMode::Lazy);
231        assert!(opts.skip_aux_parts());
232    }
233
234    #[test]
235    fn test_read_mode_default_is_lazy() {
236        let mode = ReadMode::default();
237        assert_eq!(mode, ReadMode::Lazy);
238    }
239
240    #[test]
241    fn test_read_mode_combined_with_other_options() {
242        let opts = OpenOptions::new().sheet_rows(50).read_mode(ReadMode::Lazy);
243        assert_eq!(opts.sheet_rows, Some(50));
244        assert!(opts.skip_aux_parts());
245    }
246
247    #[test]
248    fn test_stream_mode_skips_aux_parts() {
249        let opts = OpenOptions::new().read_mode(ReadMode::Stream);
250        assert!(opts.skip_aux_parts());
251        assert!(opts.is_stream());
252        assert!(!opts.is_eager());
253        assert!(!opts.is_lazy());
254    }
255
256    #[test]
257    fn test_aux_parts_default_is_deferred() {
258        let opts = OpenOptions::default();
259        assert_eq!(opts.aux_parts, AuxParts::Deferred);
260    }
261
262    #[test]
263    fn test_aux_parts_deferred() {
264        let opts = OpenOptions::new().aux_parts(AuxParts::Deferred);
265        assert_eq!(opts.aux_parts, AuxParts::Deferred);
266    }
267
268    #[test]
269    fn test_eager_mode_with_deferred_aux_skips_aux() {
270        let opts = OpenOptions::new()
271            .read_mode(ReadMode::Eager)
272            .aux_parts(AuxParts::Deferred);
273        assert!(opts.skip_aux_parts());
274    }
275
276    #[test]
277    fn test_eager_mode_with_eager_aux_parses_all() {
278        let opts = OpenOptions::new()
279            .read_mode(ReadMode::Eager)
280            .aux_parts(AuxParts::EagerLoad);
281        assert!(!opts.skip_aux_parts());
282    }
283
284    #[test]
285    fn test_should_parse_sheet_no_filter() {
286        let opts = OpenOptions::default();
287        assert!(opts.should_parse_sheet("Sheet1"));
288        assert!(opts.should_parse_sheet("anything"));
289    }
290
291    #[test]
292    fn test_should_parse_sheet_with_filter() {
293        let opts = OpenOptions::new().sheets(vec!["Sales".to_string(), "Data".to_string()]);
294        assert!(opts.should_parse_sheet("Sales"));
295        assert!(opts.should_parse_sheet("Data"));
296        assert!(!opts.should_parse_sheet("Sheet1"));
297        assert!(!opts.should_parse_sheet("Other"));
298    }
299
300    #[test]
301    fn test_helper_methods() {
302        let eager = OpenOptions::new().read_mode(ReadMode::Eager);
303        assert!(eager.is_eager());
304        assert!(!eager.is_lazy());
305        assert!(!eager.is_stream());
306
307        let lazy = OpenOptions::new().read_mode(ReadMode::Lazy);
308        assert!(!lazy.is_eager());
309        assert!(lazy.is_lazy());
310        assert!(!lazy.is_stream());
311
312        let stream = OpenOptions::new().read_mode(ReadMode::Stream);
313        assert!(!stream.is_eager());
314        assert!(!stream.is_lazy());
315        assert!(stream.is_stream());
316    }
317}