Skip to main content

sheetkit_core/
sst.rs

1//! Runtime shared string table.
2//!
3//! The [`SharedStringTable`] provides an efficient in-memory index for looking
4//! up and inserting shared strings. It bridges the gap between the XML-level
5//! [`sheetkit_xml::shared_strings::Sst`] and the high-level cell API.
6
7use std::collections::HashMap;
8
9use sheetkit_xml::shared_strings::{Si, Sst, T};
10
11use crate::rich_text::{xml_to_run, RichTextRun};
12
13/// Runtime shared string table for efficient string lookup and insertion.
14///
15/// Maintains both an ordered list of strings (for index-based lookup) and a
16/// reverse hash map (for deduplication when inserting). Original [`Si`] items
17/// loaded from file are preserved so that `to_sst()` can reuse them without
18/// cloning the string data a second time.
19#[derive(Debug)]
20pub struct SharedStringTable {
21    strings: Vec<String>,
22    index_map: HashMap<String, usize>,
23    /// Original or constructed Si items, parallel to `strings`.
24    /// `None` for plain-text items added via `add()` / `add_owned()`.
25    si_items: Vec<Option<Si>>,
26}
27
28impl SharedStringTable {
29    /// Create a new, empty shared string table.
30    pub fn new() -> Self {
31        Self {
32            strings: Vec::new(),
33            index_map: HashMap::new(),
34            si_items: Vec::new(),
35        }
36    }
37
38    /// Build from an XML [`Sst`], taking ownership to avoid cloning items.
39    ///
40    /// Plain-text items use the `t` field directly. Rich-text items
41    /// concatenate all run texts. Pre-sizes internal containers.
42    pub fn from_sst(sst: Sst) -> Self {
43        let cap = sst.items.len();
44        let mut strings = Vec::with_capacity(cap);
45        let mut index_map = HashMap::with_capacity(cap);
46        let mut si_items: Vec<Option<Si>> = Vec::with_capacity(cap);
47
48        for si in sst.items {
49            let text = si_to_string(&si);
50            let idx = strings.len();
51            index_map.entry(text.clone()).or_insert(idx);
52            // Preserve the original Si for rich text or items with xml:space.
53            let is_rich = si.t.is_none() && !si.r.is_empty();
54            let has_space_attr = si.t.as_ref().is_some_and(|t| t.xml_space.is_some());
55            if is_rich || has_space_attr {
56                si_items.push(Some(si));
57            } else {
58                si_items.push(None);
59            }
60            strings.push(text);
61        }
62
63        Self {
64            strings,
65            index_map,
66            si_items,
67        }
68    }
69
70    /// Convert back to an XML [`Sst`] struct for serialization.
71    ///
72    /// Reuses stored [`Si`] items for entries loaded from file. Builds new
73    /// `Si` items only for strings added at runtime.
74    pub fn to_sst(&self) -> Sst {
75        let items: Vec<Si> = self
76            .strings
77            .iter()
78            .enumerate()
79            .map(|(idx, s)| {
80                if let Some(ref si) = self.si_items[idx] {
81                    si.clone()
82                } else {
83                    Si {
84                        t: Some(T {
85                            xml_space: if needs_space_preserve(s) {
86                                Some("preserve".to_string())
87                            } else {
88                                None
89                            },
90                            value: s.clone(),
91                        }),
92                        r: vec![],
93                    }
94                }
95            })
96            .collect();
97
98        let len = items.len() as u32;
99        Sst {
100            xmlns: sheetkit_xml::namespaces::SPREADSHEET_ML.to_string(),
101            count: Some(len),
102            unique_count: Some(len),
103            items,
104        }
105    }
106
107    /// Get a string by its index.
108    pub fn get(&self, index: usize) -> Option<&str> {
109        self.strings.get(index).map(|s| s.as_str())
110    }
111
112    /// Add a string by reference, returning its index.
113    ///
114    /// If the string already exists, the existing index is returned (dedup).
115    pub fn add(&mut self, s: &str) -> usize {
116        if let Some(&idx) = self.index_map.get(s) {
117            return idx;
118        }
119        let idx = self.strings.len();
120        self.strings.push(s.to_string());
121        self.index_map.insert(s.to_string(), idx);
122        self.si_items.push(None);
123        idx
124    }
125
126    /// Add a string by value, returning its index.
127    ///
128    /// Avoids one allocation compared to `add()` when the caller already
129    /// owns a `String`.
130    pub fn add_owned(&mut self, s: String) -> usize {
131        if let Some(&idx) = self.index_map.get(&s) {
132            return idx;
133        }
134        let idx = self.strings.len();
135        self.index_map.insert(s.clone(), idx);
136        self.strings.push(s);
137        self.si_items.push(None);
138        idx
139    }
140
141    /// Add rich text runs, returning the SST index.
142    ///
143    /// The plain-text concatenation of the runs is used for deduplication.
144    pub fn add_rich_text(&mut self, runs: &[RichTextRun]) -> usize {
145        let plain: String = runs.iter().map(|r| r.text.as_str()).collect();
146        if let Some(&idx) = self.index_map.get(&plain) {
147            return idx;
148        }
149        let idx = self.strings.len();
150        self.index_map.insert(plain.clone(), idx);
151        self.strings.push(plain);
152        let si = crate::rich_text::runs_to_si(runs);
153        self.si_items.push(Some(si));
154        idx
155    }
156
157    /// Get rich text runs for an SST entry, if it has formatting.
158    ///
159    /// Returns `None` for plain-text entries.
160    pub fn get_rich_text(&self, index: usize) -> Option<Vec<RichTextRun>> {
161        self.si_items
162            .get(index)
163            .and_then(|opt| opt.as_ref())
164            .filter(|si| !si.r.is_empty())
165            .map(|si| si.r.iter().map(xml_to_run).collect())
166    }
167
168    /// Number of unique strings.
169    pub fn len(&self) -> usize {
170        self.strings.len()
171    }
172
173    /// Returns `true` if the table contains no strings.
174    pub fn is_empty(&self) -> bool {
175        self.strings.is_empty()
176    }
177}
178
179impl Default for SharedStringTable {
180    fn default() -> Self {
181        Self::new()
182    }
183}
184
185/// Check whether a string needs `xml:space="preserve"`.
186fn needs_space_preserve(s: &str) -> bool {
187    s.starts_with(' ')
188        || s.ends_with(' ')
189        || s.contains("  ")
190        || s.contains('\n')
191        || s.contains('\t')
192}
193
194/// Extract the plain-text content of a shared string item.
195///
196/// For plain items, returns `si.t.value`. For rich-text items, concatenates
197/// all run texts.
198fn si_to_string(si: &Si) -> String {
199    if let Some(ref t) = si.t {
200        t.value.clone()
201    } else {
202        // Rich text: concatenate all runs.
203        si.r.iter().map(|r| r.t.value.as_str()).collect()
204    }
205}
206
207#[cfg(test)]
208mod tests {
209    use super::*;
210    use sheetkit_xml::shared_strings::{Si, Sst, R, T};
211
212    #[test]
213    fn test_sst_new_is_empty() {
214        let table = SharedStringTable::new();
215        assert!(table.is_empty());
216        assert_eq!(table.len(), 0);
217    }
218
219    #[test]
220    fn test_sst_add_returns_index() {
221        let mut table = SharedStringTable::new();
222        assert_eq!(table.add("hello"), 0);
223        assert_eq!(table.add("world"), 1);
224        assert_eq!(table.add("foo"), 2);
225        assert_eq!(table.len(), 3);
226    }
227
228    #[test]
229    fn test_sst_add_deduplicates() {
230        let mut table = SharedStringTable::new();
231        assert_eq!(table.add("hello"), 0);
232        assert_eq!(table.add("world"), 1);
233        assert_eq!(table.add("hello"), 0); // duplicate -> same index
234        assert_eq!(table.len(), 2); // only 2 unique strings
235    }
236
237    #[test]
238    fn test_sst_add_owned() {
239        let mut table = SharedStringTable::new();
240        assert_eq!(table.add_owned("hello".to_string()), 0);
241        assert_eq!(table.add_owned("world".to_string()), 1);
242        assert_eq!(table.add_owned("hello".to_string()), 0); // dedup
243        assert_eq!(table.len(), 2);
244        assert_eq!(table.get(0), Some("hello"));
245        assert_eq!(table.get(1), Some("world"));
246    }
247
248    #[test]
249    fn test_sst_get() {
250        let mut table = SharedStringTable::new();
251        table.add("alpha");
252        table.add("beta");
253
254        assert_eq!(table.get(0), Some("alpha"));
255        assert_eq!(table.get(1), Some("beta"));
256        assert_eq!(table.get(2), None);
257    }
258
259    #[test]
260    fn test_sst_from_xml_and_back() {
261        let xml_sst = Sst {
262            xmlns: sheetkit_xml::namespaces::SPREADSHEET_ML.to_string(),
263            count: Some(3),
264            unique_count: Some(3),
265            items: vec![
266                Si {
267                    t: Some(T {
268                        xml_space: None,
269                        value: "Name".to_string(),
270                    }),
271                    r: vec![],
272                },
273                Si {
274                    t: Some(T {
275                        xml_space: None,
276                        value: "Age".to_string(),
277                    }),
278                    r: vec![],
279                },
280                Si {
281                    t: Some(T {
282                        xml_space: None,
283                        value: "City".to_string(),
284                    }),
285                    r: vec![],
286                },
287            ],
288        };
289
290        let table = SharedStringTable::from_sst(xml_sst);
291        assert_eq!(table.len(), 3);
292        assert_eq!(table.get(0), Some("Name"));
293        assert_eq!(table.get(1), Some("Age"));
294        assert_eq!(table.get(2), Some("City"));
295
296        // Convert back
297        let back = table.to_sst();
298        assert_eq!(back.items.len(), 3);
299        assert_eq!(back.items[0].t.as_ref().unwrap().value, "Name");
300        assert_eq!(back.items[1].t.as_ref().unwrap().value, "Age");
301        assert_eq!(back.items[2].t.as_ref().unwrap().value, "City");
302        assert_eq!(back.count, Some(3));
303        assert_eq!(back.unique_count, Some(3));
304    }
305
306    #[test]
307    fn test_sst_from_xml_rich_text() {
308        let xml_sst = Sst {
309            xmlns: sheetkit_xml::namespaces::SPREADSHEET_ML.to_string(),
310            count: Some(1),
311            unique_count: Some(1),
312            items: vec![Si {
313                t: None,
314                r: vec![
315                    R {
316                        r_pr: None,
317                        t: T {
318                            xml_space: None,
319                            value: "Bold".to_string(),
320                        },
321                    },
322                    R {
323                        r_pr: None,
324                        t: T {
325                            xml_space: None,
326                            value: " Normal".to_string(),
327                        },
328                    },
329                ],
330            }],
331        };
332
333        let table = SharedStringTable::from_sst(xml_sst);
334        assert_eq!(table.len(), 1);
335        assert_eq!(table.get(0), Some("Bold Normal"));
336    }
337
338    #[test]
339    fn test_sst_default() {
340        let table = SharedStringTable::default();
341        assert!(table.is_empty());
342    }
343
344    #[test]
345    fn test_add_rich_text() {
346        let mut table = SharedStringTable::new();
347        let runs = vec![
348            RichTextRun {
349                text: "Hello ".to_string(),
350                font: None,
351                size: None,
352                bold: true,
353                italic: false,
354                color: None,
355            },
356            RichTextRun {
357                text: "World".to_string(),
358                font: None,
359                size: None,
360                bold: false,
361                italic: false,
362                color: None,
363            },
364        ];
365        let idx = table.add_rich_text(&runs);
366        assert_eq!(idx, 0);
367        assert_eq!(table.get(0), Some("Hello World"));
368        assert!(table.get_rich_text(0).is_some());
369    }
370
371    #[test]
372    fn test_get_rich_text_none_for_plain() {
373        let mut table = SharedStringTable::new();
374        table.add("plain");
375        assert!(table.get_rich_text(0).is_none());
376    }
377
378    #[test]
379    fn test_rich_text_roundtrip_through_sst() {
380        let xml_sst = Sst {
381            xmlns: sheetkit_xml::namespaces::SPREADSHEET_ML.to_string(),
382            count: Some(1),
383            unique_count: Some(1),
384            items: vec![Si {
385                t: None,
386                r: vec![
387                    R {
388                        r_pr: None,
389                        t: T {
390                            xml_space: None,
391                            value: "Bold".to_string(),
392                        },
393                    },
394                    R {
395                        r_pr: None,
396                        t: T {
397                            xml_space: None,
398                            value: " Normal".to_string(),
399                        },
400                    },
401                ],
402            }],
403        };
404        let table = SharedStringTable::from_sst(xml_sst);
405        let back = table.to_sst();
406        assert!(back.items[0].t.is_none());
407        assert_eq!(back.items[0].r.len(), 2);
408    }
409
410    #[test]
411    fn test_space_preserve_roundtrip() {
412        let xml_sst = Sst {
413            xmlns: sheetkit_xml::namespaces::SPREADSHEET_ML.to_string(),
414            count: Some(1),
415            unique_count: Some(1),
416            items: vec![Si {
417                t: Some(T {
418                    xml_space: Some("preserve".to_string()),
419                    value: " leading space".to_string(),
420                }),
421                r: vec![],
422            }],
423        };
424        let table = SharedStringTable::from_sst(xml_sst);
425        let back = table.to_sst();
426        assert_eq!(
427            back.items[0].t.as_ref().unwrap().xml_space,
428            Some("preserve".to_string())
429        );
430    }
431
432    #[test]
433    fn test_add_owned_then_to_sst() {
434        let mut table = SharedStringTable::new();
435        table.add_owned("test".to_string());
436        let sst = table.to_sst();
437        assert_eq!(sst.items.len(), 1);
438        assert_eq!(sst.items[0].t.as_ref().unwrap().value, "test");
439    }
440}