Skip to main content

sheetkit_xml/
shared_strings.rs

1//! Shared Strings XML schema structures.
2//!
3//! Represents `xl/sharedStrings.xml` in the OOXML package.
4
5use serde::{Deserialize, Serialize};
6
7use crate::namespaces;
8
9/// Shared String Table root element (`xl/sharedStrings.xml`).
10#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
11#[serde(rename = "sst")]
12pub struct Sst {
13    #[serde(rename = "@xmlns")]
14    pub xmlns: String,
15
16    /// Total reference count of shared strings in the workbook.
17    #[serde(rename = "@count", skip_serializing_if = "Option::is_none")]
18    pub count: Option<u32>,
19
20    /// Number of unique string entries.
21    #[serde(rename = "@uniqueCount", skip_serializing_if = "Option::is_none")]
22    pub unique_count: Option<u32>,
23
24    /// Shared string items.
25    #[serde(rename = "si", default)]
26    pub items: Vec<Si>,
27}
28
29/// Shared String Item.
30#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
31pub struct Si {
32    /// Plain text content.
33    #[serde(rename = "t", skip_serializing_if = "Option::is_none")]
34    pub t: Option<T>,
35
36    /// Rich text runs (formatted text).
37    #[serde(rename = "r", default)]
38    pub r: Vec<R>,
39}
40
41/// Text element with optional space preservation.
42#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
43pub struct T {
44    #[serde(
45        rename = "@xml:space",
46        alias = "@space",
47        skip_serializing_if = "Option::is_none"
48    )]
49    pub xml_space: Option<String>,
50
51    #[serde(rename = "$value", default)]
52    pub value: String,
53}
54
55/// Rich text run.
56#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
57pub struct R {
58    /// Run properties (formatting).
59    #[serde(rename = "rPr", skip_serializing_if = "Option::is_none")]
60    pub r_pr: Option<RPr>,
61
62    /// Text content.
63    #[serde(rename = "t")]
64    pub t: T,
65}
66
67/// Run properties (text formatting within a rich text run).
68#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
69pub struct RPr {
70    #[serde(rename = "b", skip_serializing_if = "Option::is_none")]
71    pub b: Option<BoolVal>,
72
73    #[serde(rename = "i", skip_serializing_if = "Option::is_none")]
74    pub i: Option<BoolVal>,
75
76    #[serde(rename = "sz", skip_serializing_if = "Option::is_none")]
77    pub sz: Option<FontSize>,
78
79    #[serde(rename = "color", skip_serializing_if = "Option::is_none")]
80    pub color: Option<Color>,
81
82    #[serde(rename = "rFont", skip_serializing_if = "Option::is_none")]
83    pub r_font: Option<FontName>,
84
85    #[serde(rename = "family", skip_serializing_if = "Option::is_none")]
86    pub family: Option<FontFamily>,
87
88    #[serde(rename = "scheme", skip_serializing_if = "Option::is_none")]
89    pub scheme: Option<FontScheme>,
90}
91
92/// Boolean value wrapper.
93#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
94pub struct BoolVal {
95    #[serde(rename = "@val", skip_serializing_if = "Option::is_none")]
96    pub val: Option<bool>,
97}
98
99/// Font size.
100#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
101pub struct FontSize {
102    #[serde(rename = "@val")]
103    pub val: f64,
104}
105
106/// Color.
107#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
108pub struct Color {
109    #[serde(rename = "@rgb", skip_serializing_if = "Option::is_none")]
110    pub rgb: Option<String>,
111
112    #[serde(rename = "@theme", skip_serializing_if = "Option::is_none")]
113    pub theme: Option<u32>,
114
115    #[serde(rename = "@tint", skip_serializing_if = "Option::is_none")]
116    pub tint: Option<f64>,
117}
118
119/// Font name.
120#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
121pub struct FontName {
122    #[serde(rename = "@val")]
123    pub val: String,
124}
125
126/// Font family.
127#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
128pub struct FontFamily {
129    #[serde(rename = "@val")]
130    pub val: u32,
131}
132
133/// Font scheme.
134#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
135pub struct FontScheme {
136    #[serde(rename = "@val")]
137    pub val: String,
138}
139
140impl Default for Sst {
141    fn default() -> Self {
142        Self {
143            xmlns: namespaces::SPREADSHEET_ML.to_string(),
144            count: Some(0),
145            unique_count: Some(0),
146            items: vec![],
147        }
148    }
149}
150
151#[cfg(test)]
152mod tests {
153    use super::*;
154
155    // NOTE: quick-xml's serde Deserializer trims leading whitespace from text
156    // events by default (via internal StartTrimmer). This means text like " text"
157    // becomes "text" after deserialization. For production use, the higher-level
158    // reader should handle whitespace preservation using the raw Reader API.
159    // Tests here verify the serde round-trip behavior as-is.
160
161    #[test]
162    fn test_sst_default() {
163        let sst = Sst::default();
164        assert_eq!(sst.xmlns, namespaces::SPREADSHEET_ML);
165        assert_eq!(sst.count, Some(0));
166        assert_eq!(sst.unique_count, Some(0));
167        assert!(sst.items.is_empty());
168    }
169
170    #[test]
171    fn test_sst_roundtrip() {
172        let sst = Sst {
173            xmlns: namespaces::SPREADSHEET_ML.to_string(),
174            count: Some(3),
175            unique_count: Some(2),
176            items: vec![
177                Si {
178                    t: Some(T {
179                        xml_space: None,
180                        value: "Hello".to_string(),
181                    }),
182                    r: vec![],
183                },
184                Si {
185                    t: Some(T {
186                        xml_space: None,
187                        value: "World".to_string(),
188                    }),
189                    r: vec![],
190                },
191            ],
192        };
193        let xml = quick_xml::se::to_string(&sst).unwrap();
194        let parsed: Sst = quick_xml::de::from_str(&xml).unwrap();
195        assert_eq!(sst.count, parsed.count);
196        assert_eq!(sst.unique_count, parsed.unique_count);
197        assert_eq!(sst.items.len(), parsed.items.len());
198        assert_eq!(
199            sst.items[0].t.as_ref().unwrap().value,
200            parsed.items[0].t.as_ref().unwrap().value
201        );
202    }
203
204    #[test]
205    fn test_sst_with_plain_strings() {
206        let sst = Sst {
207            xmlns: namespaces::SPREADSHEET_ML.to_string(),
208            count: Some(2),
209            unique_count: Some(2),
210            items: vec![
211                Si {
212                    t: Some(T {
213                        xml_space: None,
214                        value: "Name".to_string(),
215                    }),
216                    r: vec![],
217                },
218                Si {
219                    t: Some(T {
220                        xml_space: None,
221                        value: "Age".to_string(),
222                    }),
223                    r: vec![],
224                },
225            ],
226        };
227        let xml = quick_xml::se::to_string(&sst).unwrap();
228        assert!(xml.contains("Name"));
229        assert!(xml.contains("Age"));
230    }
231
232    #[test]
233    fn test_sst_with_rich_text() {
234        let sst = Sst {
235            xmlns: namespaces::SPREADSHEET_ML.to_string(),
236            count: Some(1),
237            unique_count: Some(1),
238            items: vec![Si {
239                t: None,
240                r: vec![
241                    R {
242                        r_pr: Some(RPr {
243                            b: Some(BoolVal { val: None }),
244                            i: None,
245                            sz: Some(FontSize { val: 11.0 }),
246                            color: None,
247                            r_font: Some(FontName {
248                                val: "Calibri".to_string(),
249                            }),
250                            family: None,
251                            scheme: None,
252                        }),
253                        t: T {
254                            xml_space: None,
255                            value: "Bold".to_string(),
256                        },
257                    },
258                    R {
259                        r_pr: None,
260                        t: T {
261                            xml_space: None,
262                            value: " Normal".to_string(),
263                        },
264                    },
265                ],
266            }],
267        };
268
269        let xml = quick_xml::se::to_string(&sst).unwrap();
270        let parsed: Sst = quick_xml::de::from_str(&xml).unwrap();
271        assert_eq!(parsed.items.len(), 1);
272        assert!(parsed.items[0].t.is_none());
273        assert_eq!(parsed.items[0].r.len(), 2);
274        assert!(parsed.items[0].r[0].r_pr.is_some());
275        assert!(parsed.items[0].r[0].r_pr.as_ref().unwrap().b.is_some());
276        assert_eq!(parsed.items[0].r[0].t.value, "Bold");
277        // Note: quick-xml's StartTrimmer trims leading whitespace from text
278        // after a start tag. " Normal" becomes "Normal" during deserialization.
279        // The higher-level reader must handle whitespace preservation.
280        assert_eq!(parsed.items[0].r[1].t.value, "Normal");
281    }
282
283    #[test]
284    fn test_parse_real_excel_shared_strings() {
285        let xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
286<sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="4" uniqueCount="3">
287  <si><t>Name</t></si>
288  <si><t>Value</t></si>
289  <si><t>Description</t></si>
290</sst>"#;
291
292        let parsed: Sst = quick_xml::de::from_str(xml).unwrap();
293        assert_eq!(parsed.count, Some(4));
294        assert_eq!(parsed.unique_count, Some(3));
295        assert_eq!(parsed.items.len(), 3);
296        assert_eq!(parsed.items[0].t.as_ref().unwrap().value, "Name");
297        assert_eq!(parsed.items[1].t.as_ref().unwrap().value, "Value");
298        assert_eq!(parsed.items[2].t.as_ref().unwrap().value, "Description");
299    }
300
301    #[test]
302    fn test_parse_real_excel_rich_text_shared_strings() {
303        let xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
304<sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="1" uniqueCount="1">
305  <si>
306    <r>
307      <rPr><b/><sz val="11"/><rFont val="Calibri"/></rPr>
308      <t>Bold</t>
309    </r>
310    <r>
311      <t> text</t>
312    </r>
313  </si>
314</sst>"#;
315
316        let parsed: Sst = quick_xml::de::from_str(xml).unwrap();
317        assert_eq!(parsed.items.len(), 1);
318        let item = &parsed.items[0];
319        assert!(item.t.is_none());
320        assert_eq!(item.r.len(), 2);
321        assert!(item.r[0].r_pr.is_some());
322        assert!(item.r[0].r_pr.as_ref().unwrap().b.is_some());
323        assert_eq!(item.r[0].t.value, "Bold");
324        // Leading whitespace trimmed by quick-xml's StartTrimmer
325        assert_eq!(item.r[1].t.value, "text");
326    }
327
328    #[test]
329    fn test_text_with_space_preservation() {
330        let t = T {
331            xml_space: Some("preserve".to_string()),
332            value: "  leading spaces  ".to_string(),
333        };
334        let xml = quick_xml::se::to_string(&t).unwrap();
335        assert!(xml.contains("xml:space=\"preserve\""));
336        let parsed: T = quick_xml::de::from_str(&xml).unwrap();
337        assert_eq!(parsed.xml_space, Some("preserve".to_string()));
338        // Note: quick-xml's StartTrimmer trims leading whitespace.
339        // The xml:space="preserve" attribute is preserved in the struct
340        // for correct re-serialization; actual whitespace preservation
341        // requires the higher-level reader to use raw Reader API.
342        assert_eq!(parsed.value, "leading spaces");
343    }
344
345    #[test]
346    fn test_empty_sst_roundtrip() {
347        let sst = Sst::default();
348        let xml = quick_xml::se::to_string(&sst).unwrap();
349        let parsed: Sst = quick_xml::de::from_str(&xml).unwrap();
350        assert!(parsed.items.is_empty());
351        assert_eq!(parsed.count, Some(0));
352        assert_eq!(parsed.unique_count, Some(0));
353    }
354
355    #[test]
356    fn test_sst_serialize_structure() {
357        let sst = Sst {
358            xmlns: namespaces::SPREADSHEET_ML.to_string(),
359            count: Some(1),
360            unique_count: Some(1),
361            items: vec![Si {
362                t: Some(T {
363                    xml_space: None,
364                    value: "test".to_string(),
365                }),
366                r: vec![],
367            }],
368        };
369        let xml = quick_xml::se::to_string(&sst).unwrap();
370        assert!(xml.contains("<sst"));
371        assert!(xml.contains("<si>"));
372        assert!(xml.contains("<t>"));
373        assert!(xml.contains("test"));
374    }
375}