Skip to main content

sheetkit_core/
sst.rs

1//! Runtime shared string table.
2//!
3//! The [`SharedStringTable`] provides an efficient in-memory index for looking
4//! up and inserting shared strings. It bridges the gap between the XML-level
5//! [`sheetkit_xml::shared_strings::Sst`] and the high-level cell API.
6
7use std::collections::HashMap;
8
9use sheetkit_xml::shared_strings::{Si, Sst, T};
10
11use crate::rich_text::{xml_to_run, RichTextRun};
12
13/// Runtime shared string table for efficient string lookup and insertion.
14///
15/// Maintains both an ordered list of strings (for index-based lookup) and a
16/// reverse hash map (for deduplication when inserting). Also preserves rich
17/// text formatting information for round-tripping.
18#[derive(Debug)]
19pub struct SharedStringTable {
20    strings: Vec<String>,
21    index_map: HashMap<String, usize>,
22    rich_items: HashMap<usize, Si>,
23}
24
25impl SharedStringTable {
26    /// Create a new, empty shared string table.
27    pub fn new() -> Self {
28        Self {
29            strings: Vec::new(),
30            index_map: HashMap::new(),
31            rich_items: HashMap::new(),
32        }
33    }
34
35    /// Build from an XML [`Sst`] struct.
36    ///
37    /// Plain-text items use the `t` field directly. Rich-text items
38    /// concatenate all run texts.
39    pub fn from_sst(sst: &Sst) -> Self {
40        let mut table = Self::new();
41
42        for si in &sst.items {
43            let text = si_to_string(si);
44            let idx = table.strings.len();
45            table.index_map.entry(text.clone()).or_insert(idx);
46            if si.t.is_none() && !si.r.is_empty() {
47                table.rich_items.insert(idx, si.clone());
48            }
49            table.strings.push(text);
50        }
51
52        table
53    }
54
55    /// Convert back to an XML [`Sst`] struct.
56    pub fn to_sst(&self) -> Sst {
57        let items: Vec<Si> = self
58            .strings
59            .iter()
60            .enumerate()
61            .map(|(idx, s)| {
62                if let Some(rich_si) = self.rich_items.get(&idx) {
63                    rich_si.clone()
64                } else {
65                    Si {
66                        t: Some(T {
67                            xml_space: if s.starts_with(' ')
68                                || s.ends_with(' ')
69                                || s.contains("  ")
70                                || s.contains('\n')
71                                || s.contains('\t')
72                            {
73                                Some("preserve".to_string())
74                            } else {
75                                None
76                            },
77                            value: s.clone(),
78                        }),
79                        r: vec![],
80                    }
81                }
82            })
83            .collect();
84
85        let len = items.len() as u32;
86        Sst {
87            xmlns: sheetkit_xml::namespaces::SPREADSHEET_ML.to_string(),
88            count: Some(len),
89            unique_count: Some(len),
90            items,
91        }
92    }
93
94    /// Get a string by its index.
95    pub fn get(&self, index: usize) -> Option<&str> {
96        self.strings.get(index).map(|s| s.as_str())
97    }
98
99    /// Add a string, returning its index.
100    ///
101    /// If the string already exists, the existing index is returned (dedup).
102    pub fn add(&mut self, s: &str) -> usize {
103        if let Some(&idx) = self.index_map.get(s) {
104            return idx;
105        }
106        let idx = self.strings.len();
107        self.strings.push(s.to_string());
108        self.index_map.insert(s.to_string(), idx);
109        idx
110    }
111
112    /// Add rich text runs, returning the SST index.
113    ///
114    /// The plain-text concatenation of the runs is used for deduplication.
115    pub fn add_rich_text(&mut self, runs: &[RichTextRun]) -> usize {
116        let plain: String = runs.iter().map(|r| r.text.as_str()).collect();
117        if let Some(&idx) = self.index_map.get(&plain) {
118            return idx;
119        }
120        let idx = self.strings.len();
121        self.strings.push(plain.clone());
122        self.index_map.insert(plain, idx);
123        let si = crate::rich_text::runs_to_si(runs);
124        self.rich_items.insert(idx, si);
125        idx
126    }
127
128    /// Get rich text runs for an SST entry, if it has formatting.
129    ///
130    /// Returns `None` for plain-text entries.
131    pub fn get_rich_text(&self, index: usize) -> Option<Vec<RichTextRun>> {
132        self.rich_items
133            .get(&index)
134            .map(|si| si.r.iter().map(xml_to_run).collect())
135    }
136
137    /// Number of unique strings.
138    pub fn len(&self) -> usize {
139        self.strings.len()
140    }
141
142    /// Returns `true` if the table contains no strings.
143    pub fn is_empty(&self) -> bool {
144        self.strings.is_empty()
145    }
146}
147
148impl Default for SharedStringTable {
149    fn default() -> Self {
150        Self::new()
151    }
152}
153
154/// Extract the plain-text content of a shared string item.
155///
156/// For plain items, returns `si.t.value`. For rich-text items, concatenates
157/// all run texts.
158fn si_to_string(si: &Si) -> String {
159    if let Some(ref t) = si.t {
160        t.value.clone()
161    } else {
162        // Rich text: concatenate all runs.
163        si.r.iter().map(|r| r.t.value.as_str()).collect()
164    }
165}
166
167#[cfg(test)]
168mod tests {
169    use super::*;
170    use sheetkit_xml::shared_strings::{Si, Sst, R, T};
171
172    #[test]
173    fn test_sst_new_is_empty() {
174        let table = SharedStringTable::new();
175        assert!(table.is_empty());
176        assert_eq!(table.len(), 0);
177    }
178
179    #[test]
180    fn test_sst_add_returns_index() {
181        let mut table = SharedStringTable::new();
182        assert_eq!(table.add("hello"), 0);
183        assert_eq!(table.add("world"), 1);
184        assert_eq!(table.add("foo"), 2);
185        assert_eq!(table.len(), 3);
186    }
187
188    #[test]
189    fn test_sst_add_deduplicates() {
190        let mut table = SharedStringTable::new();
191        assert_eq!(table.add("hello"), 0);
192        assert_eq!(table.add("world"), 1);
193        assert_eq!(table.add("hello"), 0); // duplicate -> same index
194        assert_eq!(table.len(), 2); // only 2 unique strings
195    }
196
197    #[test]
198    fn test_sst_get() {
199        let mut table = SharedStringTable::new();
200        table.add("alpha");
201        table.add("beta");
202
203        assert_eq!(table.get(0), Some("alpha"));
204        assert_eq!(table.get(1), Some("beta"));
205        assert_eq!(table.get(2), None);
206    }
207
208    #[test]
209    fn test_sst_from_xml_and_back() {
210        let xml_sst = Sst {
211            xmlns: sheetkit_xml::namespaces::SPREADSHEET_ML.to_string(),
212            count: Some(3),
213            unique_count: Some(3),
214            items: vec![
215                Si {
216                    t: Some(T {
217                        xml_space: None,
218                        value: "Name".to_string(),
219                    }),
220                    r: vec![],
221                },
222                Si {
223                    t: Some(T {
224                        xml_space: None,
225                        value: "Age".to_string(),
226                    }),
227                    r: vec![],
228                },
229                Si {
230                    t: Some(T {
231                        xml_space: None,
232                        value: "City".to_string(),
233                    }),
234                    r: vec![],
235                },
236            ],
237        };
238
239        let table = SharedStringTable::from_sst(&xml_sst);
240        assert_eq!(table.len(), 3);
241        assert_eq!(table.get(0), Some("Name"));
242        assert_eq!(table.get(1), Some("Age"));
243        assert_eq!(table.get(2), Some("City"));
244
245        // Convert back
246        let back = table.to_sst();
247        assert_eq!(back.items.len(), 3);
248        assert_eq!(back.items[0].t.as_ref().unwrap().value, "Name");
249        assert_eq!(back.items[1].t.as_ref().unwrap().value, "Age");
250        assert_eq!(back.items[2].t.as_ref().unwrap().value, "City");
251        assert_eq!(back.count, Some(3));
252        assert_eq!(back.unique_count, Some(3));
253    }
254
255    #[test]
256    fn test_sst_from_xml_rich_text() {
257        let xml_sst = Sst {
258            xmlns: sheetkit_xml::namespaces::SPREADSHEET_ML.to_string(),
259            count: Some(1),
260            unique_count: Some(1),
261            items: vec![Si {
262                t: None,
263                r: vec![
264                    R {
265                        r_pr: None,
266                        t: T {
267                            xml_space: None,
268                            value: "Bold".to_string(),
269                        },
270                    },
271                    R {
272                        r_pr: None,
273                        t: T {
274                            xml_space: None,
275                            value: " Normal".to_string(),
276                        },
277                    },
278                ],
279            }],
280        };
281
282        let table = SharedStringTable::from_sst(&xml_sst);
283        assert_eq!(table.len(), 1);
284        assert_eq!(table.get(0), Some("Bold Normal"));
285    }
286
287    #[test]
288    fn test_sst_default() {
289        let table = SharedStringTable::default();
290        assert!(table.is_empty());
291    }
292
293    #[test]
294    fn test_add_rich_text() {
295        let mut table = SharedStringTable::new();
296        let runs = vec![
297            RichTextRun {
298                text: "Hello ".to_string(),
299                font: None,
300                size: None,
301                bold: true,
302                italic: false,
303                color: None,
304            },
305            RichTextRun {
306                text: "World".to_string(),
307                font: None,
308                size: None,
309                bold: false,
310                italic: false,
311                color: None,
312            },
313        ];
314        let idx = table.add_rich_text(&runs);
315        assert_eq!(idx, 0);
316        assert_eq!(table.get(0), Some("Hello World"));
317        assert!(table.get_rich_text(0).is_some());
318    }
319
320    #[test]
321    fn test_get_rich_text_none_for_plain() {
322        let mut table = SharedStringTable::new();
323        table.add("plain");
324        assert!(table.get_rich_text(0).is_none());
325    }
326
327    #[test]
328    fn test_rich_text_roundtrip_through_sst() {
329        let xml_sst = Sst {
330            xmlns: sheetkit_xml::namespaces::SPREADSHEET_ML.to_string(),
331            count: Some(1),
332            unique_count: Some(1),
333            items: vec![Si {
334                t: None,
335                r: vec![
336                    R {
337                        r_pr: None,
338                        t: T {
339                            xml_space: None,
340                            value: "Bold".to_string(),
341                        },
342                    },
343                    R {
344                        r_pr: None,
345                        t: T {
346                            xml_space: None,
347                            value: " Normal".to_string(),
348                        },
349                    },
350                ],
351            }],
352        };
353        let table = SharedStringTable::from_sst(&xml_sst);
354        let back = table.to_sst();
355        assert!(back.items[0].t.is_none());
356        assert_eq!(back.items[0].r.len(), 2);
357    }
358}