Skip to main content

sheetkit_core/
sst.rs

1//! Runtime shared string table.
2//!
3//! The [`SharedStringTable`] provides an efficient in-memory index for looking
4//! up and inserting shared strings. It bridges the gap between the XML-level
5//! [`sheetkit_xml::shared_strings::Sst`] and the high-level cell API.
6
7use std::collections::HashMap;
8use std::sync::Arc;
9
10use sheetkit_xml::shared_strings::{Si, Sst, T};
11
12use crate::rich_text::{xml_to_run, RichTextRun};
13
14/// Runtime shared string table for efficient string lookup and insertion.
15///
16/// Maintains both an ordered list of strings (for index-based lookup) and a
17/// reverse hash map (for deduplication when inserting). Uses `Arc<str>` so that
18/// both collections share the same string allocation. Original [`Si`] items
19/// loaded from file are preserved so that `to_sst()` can reuse them without
20/// cloning the string data a second time.
21#[derive(Debug)]
22pub struct SharedStringTable {
23    strings: Vec<Arc<str>>,
24    index_map: HashMap<Arc<str>, usize>,
25    /// Original or constructed Si items, parallel to `strings`.
26    /// `None` for plain-text items added via `add()` / `add_owned()`.
27    si_items: Vec<Option<Si>>,
28}
29
30impl SharedStringTable {
31    /// Create a new, empty shared string table.
32    pub fn new() -> Self {
33        Self {
34            strings: Vec::new(),
35            index_map: HashMap::new(),
36            si_items: Vec::new(),
37        }
38    }
39
40    /// Build from an XML [`Sst`], taking ownership to avoid cloning items.
41    ///
42    /// Plain-text items use the `t` field directly. Rich-text items
43    /// concatenate all run texts. Pre-sizes internal containers.
44    pub fn from_sst(sst: Sst) -> Self {
45        let cap = sst.items.len();
46        let mut strings = Vec::with_capacity(cap);
47        let mut index_map = HashMap::with_capacity(cap);
48        let mut si_items: Vec<Option<Si>> = Vec::with_capacity(cap);
49
50        for si in sst.items {
51            let text: Arc<str> = si_to_string(&si).into();
52            let idx = strings.len();
53            index_map.entry(Arc::clone(&text)).or_insert(idx);
54            let is_rich = si.t.is_none() && !si.r.is_empty();
55            let has_space_attr = si.t.as_ref().is_some_and(|t| t.xml_space.is_some());
56            if is_rich || has_space_attr {
57                si_items.push(Some(si));
58            } else {
59                si_items.push(None);
60            }
61            strings.push(text);
62        }
63
64        Self {
65            strings,
66            index_map,
67            si_items,
68        }
69    }
70
71    /// Convert back to an XML [`Sst`] struct for serialization.
72    ///
73    /// Reuses stored [`Si`] items for entries loaded from file. Builds new
74    /// `Si` items only for strings added at runtime.
75    pub fn to_sst(&self) -> Sst {
76        let items: Vec<Si> = self
77            .strings
78            .iter()
79            .enumerate()
80            .map(|(idx, s)| {
81                if let Some(ref si) = self.si_items[idx] {
82                    si.clone()
83                } else {
84                    Si {
85                        t: Some(T {
86                            xml_space: if needs_space_preserve(s) {
87                                Some("preserve".to_string())
88                            } else {
89                                None
90                            },
91                            value: s.to_string(),
92                        }),
93                        r: vec![],
94                    }
95                }
96            })
97            .collect();
98
99        let len = items.len() as u32;
100        Sst {
101            xmlns: sheetkit_xml::namespaces::SPREADSHEET_ML.to_string(),
102            count: Some(len),
103            unique_count: Some(len),
104            items,
105        }
106    }
107
108    /// Get a string by its index.
109    pub fn get(&self, index: usize) -> Option<&str> {
110        self.strings.get(index).map(|s| &**s)
111    }
112
113    /// Add a string by reference, returning its index.
114    ///
115    /// If the string already exists, the existing index is returned (dedup).
116    pub fn add(&mut self, s: &str) -> usize {
117        if let Some(&idx) = self.index_map.get(s) {
118            return idx;
119        }
120        let idx = self.strings.len();
121        let rc: Arc<str> = s.into();
122        self.strings.push(Arc::clone(&rc));
123        self.index_map.insert(rc, idx);
124        self.si_items.push(None);
125        idx
126    }
127
128    /// Add a string by value, returning its index.
129    ///
130    /// Avoids one allocation compared to `add()` when the caller already
131    /// owns a `String`.
132    pub fn add_owned(&mut self, s: String) -> usize {
133        if let Some(&idx) = self.index_map.get(s.as_str()) {
134            return idx;
135        }
136        let idx = self.strings.len();
137        let rc: Arc<str> = s.into();
138        self.index_map.insert(Arc::clone(&rc), idx);
139        self.strings.push(rc);
140        self.si_items.push(None);
141        idx
142    }
143
144    /// Add rich text runs, returning the SST index.
145    ///
146    /// The plain-text concatenation of the runs is used for deduplication.
147    pub fn add_rich_text(&mut self, runs: &[RichTextRun]) -> usize {
148        let plain: String = runs.iter().map(|r| r.text.as_str()).collect();
149        if let Some(&idx) = self.index_map.get(plain.as_str()) {
150            return idx;
151        }
152        let idx = self.strings.len();
153        let rc: Arc<str> = plain.into();
154        self.index_map.insert(Arc::clone(&rc), idx);
155        self.strings.push(rc);
156        let si = crate::rich_text::runs_to_si(runs);
157        self.si_items.push(Some(si));
158        idx
159    }
160
161    /// Get rich text runs for an SST entry, if it has formatting.
162    ///
163    /// Returns `None` for plain-text entries.
164    pub fn get_rich_text(&self, index: usize) -> Option<Vec<RichTextRun>> {
165        self.si_items
166            .get(index)
167            .and_then(|opt| opt.as_ref())
168            .filter(|si| !si.r.is_empty())
169            .map(|si| si.r.iter().map(xml_to_run).collect())
170    }
171
172    /// Number of unique strings.
173    pub fn len(&self) -> usize {
174        self.strings.len()
175    }
176
177    /// Returns `true` if the table contains no strings.
178    pub fn is_empty(&self) -> bool {
179        self.strings.is_empty()
180    }
181}
182
183impl Default for SharedStringTable {
184    fn default() -> Self {
185        Self::new()
186    }
187}
188
189/// Check whether a string needs `xml:space="preserve"`.
190fn needs_space_preserve(s: &str) -> bool {
191    s.starts_with(' ')
192        || s.ends_with(' ')
193        || s.contains("  ")
194        || s.contains('\n')
195        || s.contains('\t')
196}
197
198/// Extract the plain-text content of a shared string item.
199///
200/// For plain items, returns `si.t.value`. For rich-text items, concatenates
201/// all run texts.
202fn si_to_string(si: &Si) -> String {
203    if let Some(ref t) = si.t {
204        t.value.clone()
205    } else {
206        // Rich text: concatenate all runs.
207        si.r.iter().map(|r| r.t.value.as_str()).collect()
208    }
209}
210
211#[cfg(test)]
212mod tests {
213    use super::*;
214    use sheetkit_xml::shared_strings::{Si, Sst, R, T};
215
216    #[test]
217    fn test_sst_new_is_empty() {
218        let table = SharedStringTable::new();
219        assert!(table.is_empty());
220        assert_eq!(table.len(), 0);
221    }
222
223    #[test]
224    fn test_sst_add_returns_index() {
225        let mut table = SharedStringTable::new();
226        assert_eq!(table.add("hello"), 0);
227        assert_eq!(table.add("world"), 1);
228        assert_eq!(table.add("foo"), 2);
229        assert_eq!(table.len(), 3);
230    }
231
232    #[test]
233    fn test_sst_add_deduplicates() {
234        let mut table = SharedStringTable::new();
235        assert_eq!(table.add("hello"), 0);
236        assert_eq!(table.add("world"), 1);
237        assert_eq!(table.add("hello"), 0); // duplicate -> same index
238        assert_eq!(table.len(), 2); // only 2 unique strings
239    }
240
241    #[test]
242    fn test_sst_add_owned() {
243        let mut table = SharedStringTable::new();
244        assert_eq!(table.add_owned("hello".to_string()), 0);
245        assert_eq!(table.add_owned("world".to_string()), 1);
246        assert_eq!(table.add_owned("hello".to_string()), 0); // dedup
247        assert_eq!(table.len(), 2);
248        assert_eq!(table.get(0), Some("hello"));
249        assert_eq!(table.get(1), Some("world"));
250    }
251
252    #[test]
253    fn test_sst_get() {
254        let mut table = SharedStringTable::new();
255        table.add("alpha");
256        table.add("beta");
257
258        assert_eq!(table.get(0), Some("alpha"));
259        assert_eq!(table.get(1), Some("beta"));
260        assert_eq!(table.get(2), None);
261    }
262
263    #[test]
264    fn test_sst_from_xml_and_back() {
265        let xml_sst = Sst {
266            xmlns: sheetkit_xml::namespaces::SPREADSHEET_ML.to_string(),
267            count: Some(3),
268            unique_count: Some(3),
269            items: vec![
270                Si {
271                    t: Some(T {
272                        xml_space: None,
273                        value: "Name".to_string(),
274                    }),
275                    r: vec![],
276                },
277                Si {
278                    t: Some(T {
279                        xml_space: None,
280                        value: "Age".to_string(),
281                    }),
282                    r: vec![],
283                },
284                Si {
285                    t: Some(T {
286                        xml_space: None,
287                        value: "City".to_string(),
288                    }),
289                    r: vec![],
290                },
291            ],
292        };
293
294        let table = SharedStringTable::from_sst(xml_sst);
295        assert_eq!(table.len(), 3);
296        assert_eq!(table.get(0), Some("Name"));
297        assert_eq!(table.get(1), Some("Age"));
298        assert_eq!(table.get(2), Some("City"));
299
300        // Convert back
301        let back = table.to_sst();
302        assert_eq!(back.items.len(), 3);
303        assert_eq!(back.items[0].t.as_ref().unwrap().value, "Name");
304        assert_eq!(back.items[1].t.as_ref().unwrap().value, "Age");
305        assert_eq!(back.items[2].t.as_ref().unwrap().value, "City");
306        assert_eq!(back.count, Some(3));
307        assert_eq!(back.unique_count, Some(3));
308    }
309
310    #[test]
311    fn test_sst_from_xml_rich_text() {
312        let xml_sst = Sst {
313            xmlns: sheetkit_xml::namespaces::SPREADSHEET_ML.to_string(),
314            count: Some(1),
315            unique_count: Some(1),
316            items: vec![Si {
317                t: None,
318                r: vec![
319                    R {
320                        r_pr: None,
321                        t: T {
322                            xml_space: None,
323                            value: "Bold".to_string(),
324                        },
325                    },
326                    R {
327                        r_pr: None,
328                        t: T {
329                            xml_space: None,
330                            value: " Normal".to_string(),
331                        },
332                    },
333                ],
334            }],
335        };
336
337        let table = SharedStringTable::from_sst(xml_sst);
338        assert_eq!(table.len(), 1);
339        assert_eq!(table.get(0), Some("Bold Normal"));
340    }
341
342    #[test]
343    fn test_sst_default() {
344        let table = SharedStringTable::default();
345        assert!(table.is_empty());
346    }
347
348    #[test]
349    fn test_add_rich_text() {
350        let mut table = SharedStringTable::new();
351        let runs = vec![
352            RichTextRun {
353                text: "Hello ".to_string(),
354                font: None,
355                size: None,
356                bold: true,
357                italic: false,
358                color: None,
359            },
360            RichTextRun {
361                text: "World".to_string(),
362                font: None,
363                size: None,
364                bold: false,
365                italic: false,
366                color: None,
367            },
368        ];
369        let idx = table.add_rich_text(&runs);
370        assert_eq!(idx, 0);
371        assert_eq!(table.get(0), Some("Hello World"));
372        assert!(table.get_rich_text(0).is_some());
373    }
374
375    #[test]
376    fn test_get_rich_text_none_for_plain() {
377        let mut table = SharedStringTable::new();
378        table.add("plain");
379        assert!(table.get_rich_text(0).is_none());
380    }
381
382    #[test]
383    fn test_rich_text_roundtrip_through_sst() {
384        let xml_sst = Sst {
385            xmlns: sheetkit_xml::namespaces::SPREADSHEET_ML.to_string(),
386            count: Some(1),
387            unique_count: Some(1),
388            items: vec![Si {
389                t: None,
390                r: vec![
391                    R {
392                        r_pr: None,
393                        t: T {
394                            xml_space: None,
395                            value: "Bold".to_string(),
396                        },
397                    },
398                    R {
399                        r_pr: None,
400                        t: T {
401                            xml_space: None,
402                            value: " Normal".to_string(),
403                        },
404                    },
405                ],
406            }],
407        };
408        let table = SharedStringTable::from_sst(xml_sst);
409        let back = table.to_sst();
410        assert!(back.items[0].t.is_none());
411        assert_eq!(back.items[0].r.len(), 2);
412    }
413
414    #[test]
415    fn test_space_preserve_roundtrip() {
416        let xml_sst = Sst {
417            xmlns: sheetkit_xml::namespaces::SPREADSHEET_ML.to_string(),
418            count: Some(1),
419            unique_count: Some(1),
420            items: vec![Si {
421                t: Some(T {
422                    xml_space: Some("preserve".to_string()),
423                    value: " leading space".to_string(),
424                }),
425                r: vec![],
426            }],
427        };
428        let table = SharedStringTable::from_sst(xml_sst);
429        let back = table.to_sst();
430        assert_eq!(
431            back.items[0].t.as_ref().unwrap().xml_space,
432            Some("preserve".to_string())
433        );
434    }
435
436    #[test]
437    fn test_add_owned_then_to_sst() {
438        let mut table = SharedStringTable::new();
439        table.add_owned("test".to_string());
440        let sst = table.to_sst();
441        assert_eq!(sst.items.len(), 1);
442        assert_eq!(sst.items[0].t.as_ref().unwrap().value, "test");
443    }
444}