use xlsbye_core::error::{Result, XlsByeError};
use xlsbye_core::types::{RichTextRun, SharedStringEntry, SharedStringTable};
use crate::record::cursor::RecordCursor;
use crate::record::header::RecordIter;
use crate::record::ids::{BRT_BEGIN_SST, BRT_END_SST, BRT_SST_ITEM};
pub fn parse_shared_strings(data: &[u8]) -> Result<(SharedStringTable, u32)> {
let mut sst = SharedStringTable::new();
let mut total_ref_count = 0u32;
let mut expected_unique_count: Option<u32> = None;
let mut seen_begin_sst = false;
for record in RecordIter::new(data) {
let (record_type, payload) = record?;
if record_type == BRT_END_SST.as_u16() {
break;
}
if record_type == BRT_BEGIN_SST.as_u16() {
let mut cursor = RecordCursor::new(payload);
total_ref_count = cursor.read_u32()?;
expected_unique_count = Some(cursor.read_u32()?);
if !cursor.is_empty() {
return Err(XlsByeError::Biff12(
"BrtBeginSst payload has trailing bytes".to_string(),
));
}
seen_begin_sst = true;
continue;
}
if record_type == BRT_SST_ITEM.as_u16() {
if !seen_begin_sst {
return Err(XlsByeError::Biff12(
"BrtSstItem encountered before BrtBeginSst".to_string(),
));
}
sst.push(parse_sst_item(payload)?);
}
}
if !seen_begin_sst {
return Err(XlsByeError::Biff12(
"missing BrtBeginSst record in sharedStrings.bin".to_string(),
));
}
if let Some(unique_count) = expected_unique_count {
let parsed_unique_count = u32::try_from(sst.len()).map_err(|_| {
XlsByeError::Biff12("shared string unique count exceeds u32 range".to_string())
})?;
if parsed_unique_count != unique_count {
return Err(XlsByeError::Biff12(format!(
"shared string unique count mismatch: BrtBeginSst={unique_count}, parsed={parsed_unique_count}"
)));
}
}
Ok((sst, total_ref_count))
}
fn parse_sst_item(payload: &[u8]) -> Result<SharedStringEntry> {
let mut cursor = RecordCursor::new(payload);
let flags = cursor.read_u8()?;
let text = cursor.read_wide_string()?;
let has_rich_text = (flags & 0x01) != 0;
if !has_rich_text {
if !cursor.is_empty() {
return Err(XlsByeError::Biff12(
"BrtSstItem plain string payload has trailing bytes".to_string(),
));
}
return Ok(SharedStringEntry::Plain(text));
}
let run_count = usize::try_from(cursor.read_u32()?)
.map_err(|_| XlsByeError::Biff12("rich text run count out of range".to_string()))?;
let mut runs = Vec::with_capacity(run_count);
for _ in 0..run_count {
let start_index = usize::from(cursor.read_u16()?);
let font_index = cursor.read_u16()?;
runs.push((start_index, font_index));
}
if !cursor.is_empty() {
return Err(XlsByeError::Biff12(
"BrtSstItem rich string payload has trailing bytes".to_string(),
));
}
let utf16 = text.encode_utf16().collect::<Vec<_>>();
let total_units = utf16.len();
let mut rich_runs = Vec::with_capacity(runs.len() + 1);
if let Some((first_start, _)) = runs.first() {
if *first_start > 0 {
let prefix = String::from_utf16_lossy(&utf16[..*first_start]);
rich_runs.push(RichTextRun {
font_index: None,
text: prefix,
});
}
}
for (index, (start, font_index)) in runs.iter().enumerate() {
if *start > total_units {
return Err(XlsByeError::Biff12(format!(
"rich text run start index {} is out of bounds for string length {}",
start, total_units
)));
}
let end = if let Some((next_start, _)) = runs.get(index + 1) {
*next_start
} else {
total_units
};
if *start > end {
return Err(XlsByeError::Biff12(
"rich text run start indices are not in ascending order".to_string(),
));
}
if end > total_units {
return Err(XlsByeError::Biff12(format!(
"rich text run end index {} is out of bounds for string length {}",
end, total_units
)));
}
let segment = String::from_utf16_lossy(&utf16[*start..end]);
rich_runs.push(RichTextRun {
font_index: Some(u32::from(*font_index)),
text: segment,
});
}
Ok(SharedStringEntry::Rich(rich_runs))
}
#[cfg(test)]
mod tests {
use super::*;
fn encode_varint(mut value: u32) -> Vec<u8> {
let mut out = Vec::new();
loop {
let mut byte = (value & 0x7F) as u8;
value >>= 7;
if value != 0 {
byte |= 0x80;
}
out.push(byte);
if value == 0 {
break;
}
}
out
}
fn encode_record(record_type: u16, payload: &[u8]) -> Vec<u8> {
let mut out = Vec::new();
out.extend_from_slice(&encode_varint(u32::from(record_type)));
out.extend_from_slice(&encode_varint(payload.len() as u32));
out.extend_from_slice(payload);
out
}
fn encode_wide_string(value: &str) -> Vec<u8> {
let utf16 = value.encode_utf16().collect::<Vec<_>>();
let mut out = Vec::new();
out.extend_from_slice(&(utf16.len() as u32).to_le_bytes());
for code_unit in utf16 {
out.extend_from_slice(&code_unit.to_le_bytes());
}
out
}
#[test]
fn parses_plain_shared_strings() {
let mut begin_payload = Vec::new();
begin_payload.extend_from_slice(&3u32.to_le_bytes());
begin_payload.extend_from_slice(&2u32.to_le_bytes());
let mut item_1 = vec![0u8];
item_1.extend_from_slice(&encode_wide_string("hello"));
let mut item_2 = vec![0u8];
item_2.extend_from_slice(&encode_wide_string("world"));
let mut data = Vec::new();
data.extend_from_slice(&encode_record(BRT_BEGIN_SST.as_u16(), &begin_payload));
data.extend_from_slice(&encode_record(BRT_SST_ITEM.as_u16(), &item_1));
data.extend_from_slice(&encode_record(BRT_SST_ITEM.as_u16(), &item_2));
data.extend_from_slice(&encode_record(BRT_END_SST.as_u16(), &[]));
let (sst, total_ref_count) = parse_shared_strings(&data).expect("parse should succeed");
assert_eq!(total_ref_count, 3);
assert_eq!(
sst,
vec![
SharedStringEntry::Plain("hello".to_string()),
SharedStringEntry::Plain("world".to_string())
]
);
}
#[test]
fn parses_rich_text_shared_string_runs() {
let mut begin_payload = Vec::new();
begin_payload.extend_from_slice(&1u32.to_le_bytes());
begin_payload.extend_from_slice(&1u32.to_le_bytes());
let mut item = vec![0x01u8];
item.extend_from_slice(&encode_wide_string("HelloWorld"));
item.extend_from_slice(&2u32.to_le_bytes());
item.extend_from_slice(&0u16.to_le_bytes());
item.extend_from_slice(&2u16.to_le_bytes());
item.extend_from_slice(&5u16.to_le_bytes());
item.extend_from_slice(&7u16.to_le_bytes());
let mut data = Vec::new();
data.extend_from_slice(&encode_record(BRT_BEGIN_SST.as_u16(), &begin_payload));
data.extend_from_slice(&encode_record(BRT_SST_ITEM.as_u16(), &item));
data.extend_from_slice(&encode_record(BRT_END_SST.as_u16(), &[]));
let (sst, total_ref_count) = parse_shared_strings(&data).expect("parse should succeed");
assert_eq!(total_ref_count, 1);
assert_eq!(
sst,
vec![SharedStringEntry::Rich(vec![
RichTextRun {
font_index: Some(2),
text: "Hello".to_string(),
},
RichTextRun {
font_index: Some(7),
text: "World".to_string(),
}
])]
);
}
#[test]
fn preserves_leading_plain_segment_in_rich_string() {
let mut begin_payload = Vec::new();
begin_payload.extend_from_slice(&1u32.to_le_bytes());
begin_payload.extend_from_slice(&1u32.to_le_bytes());
let mut item = vec![0x01u8];
item.extend_from_slice(&encode_wide_string("PrefixBold "));
item.extend_from_slice(&1u32.to_le_bytes());
item.extend_from_slice(&6u16.to_le_bytes());
item.extend_from_slice(&3u16.to_le_bytes());
let mut data = Vec::new();
data.extend_from_slice(&encode_record(BRT_BEGIN_SST.as_u16(), &begin_payload));
data.extend_from_slice(&encode_record(BRT_SST_ITEM.as_u16(), &item));
data.extend_from_slice(&encode_record(BRT_END_SST.as_u16(), &[]));
let (sst, _) = parse_shared_strings(&data).expect("parse should succeed");
assert_eq!(
sst,
vec![SharedStringEntry::Rich(vec![
RichTextRun {
font_index: None,
text: "Prefix".to_string(),
},
RichTextRun {
font_index: Some(3),
text: "Bold ".to_string(),
},
])]
);
}
}