Skip to main content

wolfxl_core/
styles.rs

1//! Pure-Rust `xl/styles.xml` reader — cellXfs + numFmts.
2//!
3//! `calamine-styles` resolves number formats for us when it can (real Excel
4//! workbooks expose them via `Style::get_number_format`), but for workbooks
5//! authored by openpyxl the style information is sometimes missing from the
6//! surface calamine exposes. This module lets us walk the raw styles.xml
7//! ourselves: read the cellXfs table of style entries, read any custom
8//! numFmts the workbook defines, and resolve a style id → format code.
9//!
10//! Paired with [`crate::worksheet_xml::parse_cell_style_ids`] (which builds
11//! the per-cell `(row, col) → styleId` map), this gives a complete fallback
12//! path for the openpyxl fixture gap.
13
14use std::collections::HashMap;
15
16use quick_xml::events::{BytesStart, Event};
17use quick_xml::Reader as XmlReader;
18
19use crate::error::{Error, Result};
20use crate::ooxml::attr_value;
21
22/// A parsed `<xf>` entry from `<cellXfs>`. A cell's `s` attribute points into
23/// this table; each entry's `num_fmt_id` then resolves via built-in table
24/// or custom `numFmts` to a format-code string.
25#[derive(Debug, Clone, PartialEq, Eq)]
26pub struct XfEntry {
27    pub num_fmt_id: u32,
28    pub font_id: u32,
29    pub fill_id: u32,
30    pub border_id: u32,
31}
32
33/// Parse the `<cellXfs>` section of styles.xml into an ordered list of
34/// [`XfEntry`]s. The ordinal position matches the `s="N"` attribute on cells.
35pub fn parse_cellxfs(xml: &str) -> Vec<XfEntry> {
36    let mut reader = XmlReader::from_str(xml);
37    reader.config_mut().trim_text(true);
38    let mut buf: Vec<u8> = Vec::new();
39    let mut in_cellxfs = false;
40    let mut entries: Vec<XfEntry> = Vec::new();
41
42    loop {
43        match reader.read_event_into(&mut buf) {
44            Ok(Event::Start(ref e)) => {
45                let tag = e.local_name();
46                if tag.as_ref() == b"cellXfs" {
47                    in_cellxfs = true;
48                } else if tag.as_ref() == b"xf" && in_cellxfs {
49                    entries.push(parse_xf_entry(e));
50                }
51            }
52            Ok(Event::Empty(ref e)) => {
53                if e.local_name().as_ref() == b"xf" && in_cellxfs {
54                    entries.push(parse_xf_entry(e));
55                }
56            }
57            Ok(Event::End(ref e)) => {
58                if e.local_name().as_ref() == b"cellXfs" {
59                    in_cellxfs = false;
60                }
61            }
62            Ok(Event::Eof) => break,
63            Err(_) => break,
64            _ => {}
65        }
66        buf.clear();
67    }
68
69    entries
70}
71
72fn parse_xf_entry(e: &BytesStart<'_>) -> XfEntry {
73    let num_fmt_id = attr_value(e, b"numFmtId")
74        .and_then(|s| s.parse().ok())
75        .unwrap_or(0);
76    let font_id = attr_value(e, b"fontId")
77        .and_then(|s| s.parse().ok())
78        .unwrap_or(0);
79    let fill_id = attr_value(e, b"fillId")
80        .and_then(|s| s.parse().ok())
81        .unwrap_or(0);
82    let border_id = attr_value(e, b"borderId")
83        .and_then(|s| s.parse().ok())
84        .unwrap_or(0);
85
86    XfEntry {
87        num_fmt_id,
88        font_id,
89        fill_id,
90        border_id,
91    }
92}
93
94/// Parse the `<numFmts>` section into `numFmtId → formatCode`. Custom
95/// formats always live here; built-in ones (IDs < 164) are resolved via
96/// [`builtin_num_fmt`] instead.
97pub fn parse_num_fmts(xml: &str) -> Result<HashMap<u32, String>> {
98    let mut reader = XmlReader::from_str(xml);
99    reader.config_mut().trim_text(true);
100    let mut buf: Vec<u8> = Vec::new();
101
102    let mut in_numfmts = false;
103    let mut formats: HashMap<u32, String> = HashMap::new();
104
105    loop {
106        match reader.read_event_into(&mut buf) {
107            Ok(Event::Start(e)) => {
108                if e.local_name().as_ref() == b"numFmts" {
109                    in_numfmts = true;
110                } else if in_numfmts && e.local_name().as_ref() == b"numFmt" {
111                    capture_num_fmt(&e, &mut formats);
112                }
113            }
114            Ok(Event::Empty(e)) => {
115                if in_numfmts && e.local_name().as_ref() == b"numFmt" {
116                    capture_num_fmt(&e, &mut formats);
117                }
118            }
119            Ok(Event::End(e)) => {
120                if e.local_name().as_ref() == b"numFmts" {
121                    in_numfmts = false;
122                }
123            }
124            Ok(Event::Eof) => break,
125            Err(e) => return Err(Error::Xlsx(format!("failed to parse styles.xml: {e}"))),
126            _ => {}
127        }
128        buf.clear();
129    }
130
131    Ok(formats)
132}
133
134fn capture_num_fmt(e: &BytesStart<'_>, out: &mut HashMap<u32, String>) {
135    let id = attr_value(e, b"numFmtId").and_then(|s| s.parse::<u32>().ok());
136    let code = attr_value(e, b"formatCode");
137    if let (Some(id), Some(code)) = (id, code) {
138        out.insert(id, code);
139    }
140}
141
142/// Excel's reserved built-in number-format codes. IDs 0..163 are reserved;
143/// 164+ are always custom and live in `<numFmts>`. Only the IDs Excel
144/// actually uses are listed; missing slots have no built-in meaning.
145///
146/// Table mirrors openpyxl's `openpyxl.styles.numbers.BUILTIN_FORMATS` so
147/// we converge on the same string a host tool would display.
148pub const BUILTIN_NUM_FMTS: &[(u32, &str)] = &[
149    (0, "General"),
150    (1, "0"),
151    (2, "0.00"),
152    (3, "#,##0"),
153    (4, "#,##0.00"),
154    (5, "\"$\"#,##0_);(\"$\"#,##0)"),
155    (6, "\"$\"#,##0_);[Red](\"$\"#,##0)"),
156    (7, "\"$\"#,##0.00_);(\"$\"#,##0.00)"),
157    (8, "\"$\"#,##0.00_);[Red](\"$\"#,##0.00)"),
158    (9, "0%"),
159    (10, "0.00%"),
160    (11, "0.00E+00"),
161    (12, "# ?/?"),
162    (13, "# ??/??"),
163    (14, "mm-dd-yy"),
164    (15, "d-mmm-yy"),
165    (16, "d-mmm"),
166    (17, "mmm-yy"),
167    (18, "h:mm AM/PM"),
168    (19, "h:mm:ss AM/PM"),
169    (20, "h:mm"),
170    (21, "h:mm:ss"),
171    (22, "m/d/yy h:mm"),
172    (37, "#,##0_);(#,##0)"),
173    (38, "#,##0_);[Red](#,##0)"),
174    (39, "#,##0.00_);(#,##0.00)"),
175    (40, "#,##0.00_);[Red](#,##0.00)"),
176    (41, r#"_(* #,##0_);_(* \(#,##0\);_(* "-"_);_(@_)"#),
177    (42, r#"_("$"* #,##0_);_("$"* \(#,##0\);_("$"* "-"_);_(@_)"#),
178    (43, r#"_(* #,##0.00_);_(* \(#,##0.00\);_(* "-"??_);_(@_)"#),
179    (
180        44,
181        r#"_("$"* #,##0.00_)_("$"* \(#,##0.00\)_("$"* "-"??_)_(@_)"#,
182    ),
183    (45, "mm:ss"),
184    (46, "[h]:mm:ss"),
185    (47, "mmss.0"),
186    (48, "##0.0E+0"),
187    (49, "@"),
188];
189
190/// Resolve a built-in numFmtId to its format-code string, or `None` if the
191/// ID isn't a known built-in. `0 → "General"` is returned as-is; callers who
192/// treat General as "no format" must filter it out themselves.
193pub fn builtin_num_fmt(id: u32) -> Option<&'static str> {
194    BUILTIN_NUM_FMTS
195        .iter()
196        .find_map(|(i, code)| if *i == id { Some(*code) } else { None })
197}
198
199/// Resolve a numFmtId against both the custom table and the built-in list.
200/// Custom entries win on conflict (Excel itself uses the custom value when
201/// an ID that overlaps with a built-in is redefined).
202pub fn resolve_num_fmt<'a>(id: u32, customs: &'a HashMap<u32, String>) -> Option<&'a str> {
203    if let Some(custom) = customs.get(&id) {
204        return Some(custom.as_str());
205    }
206    builtin_num_fmt(id)
207}
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212
213    const MINIMAL_STYLES: &str = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
214<styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
215<numFmts count="2">
216  <numFmt numFmtId="164" formatCode="&quot;$&quot;#,##0.00"/>
217  <numFmt numFmtId="165" formatCode="0.0%"/>
218</numFmts>
219<fonts count="1"><font><sz val="11"/><name val="Calibri"/></font></fonts>
220<fills count="1"><fill><patternFill patternType="none"/></fill></fills>
221<borders count="1"><border><left/><right/><top/><bottom/><diagonal/></border></borders>
222<cellXfs count="3">
223  <xf numFmtId="0" fontId="0" fillId="0" borderId="0"/>
224  <xf numFmtId="164" fontId="0" fillId="0" borderId="0" applyNumberFormat="1"/>
225  <xf numFmtId="9" fontId="0" fillId="0" borderId="0" applyNumberFormat="1"/>
226</cellXfs>
227</styleSheet>"#;
228
229    #[test]
230    fn parse_cellxfs_returns_entries_in_order() {
231        let entries = parse_cellxfs(MINIMAL_STYLES);
232        assert_eq!(entries.len(), 3);
233        assert_eq!(entries[0].num_fmt_id, 0);
234        assert_eq!(entries[1].num_fmt_id, 164);
235        assert_eq!(entries[2].num_fmt_id, 9);
236    }
237
238    #[test]
239    fn self_closing_cellxfs_does_not_capture_later_xfs() {
240        let xml = r#"
241<styleSheet>
242  <cellXfs count="0"/>
243  <cellStyleXfs count="1"><xf numFmtId="164"/></cellStyleXfs>
244</styleSheet>"#;
245        let entries = parse_cellxfs(xml);
246        assert!(entries.is_empty());
247    }
248
249    #[test]
250    fn parse_num_fmts_captures_custom_formats() {
251        let customs = parse_num_fmts(MINIMAL_STYLES).unwrap();
252        assert_eq!(customs.get(&164).map(|s| s.as_str()), Some("\"$\"#,##0.00"));
253        assert_eq!(customs.get(&165).map(|s| s.as_str()), Some("0.0%"));
254    }
255
256    #[test]
257    fn parse_num_fmts_empty_when_no_section() {
258        let xml = r#"<styleSheet><cellXfs count="1"><xf/></cellXfs></styleSheet>"#;
259        let customs = parse_num_fmts(xml).unwrap();
260        assert!(customs.is_empty());
261    }
262
263    #[test]
264    fn builtin_num_fmt_covers_common_ids() {
265        assert_eq!(builtin_num_fmt(0), Some("General"));
266        assert_eq!(builtin_num_fmt(9), Some("0%"));
267        assert_eq!(builtin_num_fmt(14), Some("mm-dd-yy"));
268        assert_eq!(
269            builtin_num_fmt(44),
270            Some(r#"_("$"* #,##0.00_)_("$"* \(#,##0.00\)_("$"* "-"??_)_(@_)"#)
271        );
272        assert_eq!(builtin_num_fmt(163), None);
273    }
274
275    #[test]
276    fn resolve_prefers_custom_over_builtin() {
277        let mut customs = HashMap::new();
278        customs.insert(9, "0.0% (redefined)".to_string());
279        assert_eq!(resolve_num_fmt(9, &customs), Some("0.0% (redefined)"));
280    }
281
282    #[test]
283    fn resolve_falls_back_to_builtin() {
284        let customs = HashMap::new();
285        assert_eq!(resolve_num_fmt(14, &customs), Some("mm-dd-yy"));
286        assert_eq!(resolve_num_fmt(999, &customs), None);
287    }
288}