use std::collections::HashMap;
use crate::sheet::Result;
const INITIAL_STRINGS_CAPACITY: usize = 10000;
#[derive(Debug, Default)]
pub struct SharedStrings {
strings: Vec<String>,
#[allow(dead_code)] string_to_index: HashMap<String, usize>,
}
impl SharedStrings {
pub fn new() -> Self {
Self::default()
}
pub fn parse(content: &str) -> Result<Self> {
let mut strings = Vec::with_capacity(INITIAL_STRINGS_CAPACITY);
let string_to_index = HashMap::with_capacity(INITIAL_STRINGS_CAPACITY);
let bytes = content.as_bytes();
let mut pos = 0;
while let Some(si_start) = memchr::memmem::find(&bytes[pos..], b"<si>") {
let si_start_pos = pos + si_start;
if let Some(si_end) = memchr::memmem::find(&bytes[si_start_pos..], b"</si>") {
let si_content = &content[si_start_pos..si_start_pos + si_end + 5];
if let Some(text) = Self::extract_text_from_si(si_content) {
strings.push(text);
}
pos = si_start_pos + si_end + 5;
} else {
break;
}
}
Ok(SharedStrings {
strings,
string_to_index,
})
}
pub fn get(&self, index: usize) -> Option<&str> {
self.strings.get(index).map(|s| s.as_str())
}
pub fn len(&self) -> usize {
self.strings.len()
}
pub fn is_empty(&self) -> bool {
self.strings.is_empty()
}
pub fn strings(&self) -> &[String] {
&self.strings
}
fn extract_text_from_si(si_content: &str) -> Option<String> {
let bytes = si_content.as_bytes();
if let Some(t_start) = memchr::memmem::find(bytes, b"<t>") {
let t_start_pos = t_start + 3;
if let Some(t_end) = memchr::memmem::find(&bytes[t_start_pos..], b"</t>") {
let text = &si_content[t_start_pos..t_start_pos + t_end];
return Some(text.to_string());
}
}
None
}
}