use crate::decode::decode_text_lossy;
use crate::error::{Error, Result};
#[derive(Debug, Clone, Default)]
pub struct SharedStrings {
strings: Vec<String>,
}
impl SharedStrings {
pub fn parse(xml: &str) -> Result<Self> {
let mut strings = Vec::new();
let mut reader = quick_xml::Reader::from_str(xml);
reader.config_mut().trim_text(false);
let mut buf = Vec::new();
let mut in_si = false;
let mut in_t = false;
let mut current_text = String::new();
loop {
match reader.read_event_into(&mut buf) {
Ok(quick_xml::events::Event::Start(e)) => match e.name().as_ref() {
b"si" => {
in_si = true;
current_text.clear();
}
b"t" if in_si => {
in_t = true;
}
_ => {}
},
Ok(quick_xml::events::Event::Text(e)) if in_t => {
let text = decode_text_lossy(&e);
current_text.push_str(&text);
}
Ok(quick_xml::events::Event::End(e)) => match e.name().as_ref() {
b"si" => {
strings.push(current_text.clone());
in_si = false;
}
b"t" => {
in_t = false;
}
_ => {}
},
Ok(quick_xml::events::Event::Eof) => break,
Err(e) => return Err(Error::XmlParse(e.to_string())),
_ => {}
}
buf.clear();
}
Ok(Self { strings })
}
pub fn get(&self, index: usize) -> Option<&str> {
self.strings.get(index).map(|s| s.as_str())
}
#[allow(dead_code)]
pub fn len(&self) -> usize {
self.strings.len()
}
#[allow(dead_code)]
pub fn is_empty(&self) -> bool {
self.strings.is_empty()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_shared_strings() {
let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
<sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="5" uniqueCount="3">
<si><t>Hello</t></si>
<si><t>World</t></si>
<si><t>Test</t></si>
</sst>"#;
let ss = SharedStrings::parse(xml).unwrap();
assert_eq!(ss.len(), 3);
assert_eq!(ss.get(0), Some("Hello"));
assert_eq!(ss.get(1), Some("World"));
assert_eq!(ss.get(2), Some("Test"));
assert_eq!(ss.get(3), None);
}
#[test]
fn test_rich_text() {
let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
<sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
<si>
<r><t>Hello</t></r>
<r><t>World</t></r>
</si>
</sst>"#;
let ss = SharedStrings::parse(xml).unwrap();
assert_eq!(ss.len(), 1);
assert_eq!(ss.get(0), Some("HelloWorld"));
}
#[test]
fn test_malformed_shared_string_preserves_raw_entity_text() {
let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
<sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
<si><t>Hello &bogus; World</t></si>
</sst>"#;
let ss = SharedStrings::parse(xml).unwrap();
assert_eq!(ss.get(0), Some("Hello &bogus; World"));
}
#[test]
fn test_shared_strings_mixed_entities_preserve_legitimate_and_malformed() {
let xml = r#"<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="1" uniqueCount="1">
<si><t>A & B &bogus; C</t></si>
</sst>"#;
let table = SharedStrings::parse(xml).expect("parse succeeds");
let s = table.get(0).expect("index 0 exists");
assert_eq!(s, "A & B &bogus; C");
}
}