use std::collections::HashSet;
use xlsbye_core::error::Result;
use xlsbye_core::types::ParsedExternalLink;
use crate::record::header::RecordIter;
use crate::record::ids::{BRT_BEGIN_EXTERN_SHEET, BRT_BEGIN_SUP_BOOK};
use crate::strings::{decode_short_string, decode_short_string_u16, decode_wide_string};
pub fn parse_external_link(data: &[u8]) -> Result<ParsedExternalLink> {
let mut rel_id = None;
let mut sheet_names = Vec::new();
let mut seen_sheet_names = HashSet::new();
for record in RecordIter::new(data) {
let (record_type, payload) = record?;
if record_type == BRT_BEGIN_SUP_BOOK.as_u16() {
if rel_id.is_none() {
rel_id = extract_rel_id(payload);
}
continue;
}
if record_type == BRT_BEGIN_EXTERN_SHEET.as_u16() {
for candidate in extract_string_candidates(payload) {
if candidate.is_empty() || is_rid(&candidate) {
continue;
}
if seen_sheet_names.insert(candidate.clone()) {
sheet_names.push(candidate);
}
}
}
}
Ok(ParsedExternalLink {
rel_id,
sheet_names,
})
}
fn extract_rel_id(payload: &[u8]) -> Option<String> {
extract_string_candidates(payload)
.into_iter()
.find(|value| is_rid(value))
}
fn is_rid(value: &str) -> bool {
let Some(suffix) = value.strip_prefix("rId") else {
return false;
};
!suffix.is_empty() && suffix.chars().all(|ch| ch.is_ascii_digit())
}
fn extract_string_candidates(payload: &[u8]) -> Vec<String> {
let mut values = Vec::new();
for offset in 0..payload.len() {
if let Some((value, _consumed)) = decode_wide_string(&payload[offset..]) {
if !value.is_empty() && !value.contains('\0') {
values.push(value);
}
}
if let Some((value, _consumed)) = decode_short_string(&payload[offset..]) {
if !value.is_empty() && !value.contains('\0') {
values.push(value);
}
}
if let Some((value, _consumed)) = decode_short_string_u16(&payload[offset..]) {
if !value.is_empty() && !value.contains('\0') {
values.push(value);
}
}
}
values
}
#[cfg(test)]
mod tests {
use super::*;
fn encode_varint(mut value: u32) -> Vec<u8> {
let mut out = Vec::new();
loop {
let mut byte = (value & 0x7F) as u8;
value >>= 7;
if value != 0 {
byte |= 0x80;
}
out.push(byte);
if value == 0 {
break;
}
}
out
}
fn encode_record(record_type: u16, payload: &[u8]) -> Vec<u8> {
let mut out = Vec::new();
out.extend_from_slice(&encode_varint(u32::from(record_type)));
out.extend_from_slice(&encode_varint(payload.len() as u32));
out.extend_from_slice(payload);
out
}
fn encode_wide_string(value: &str) -> Vec<u8> {
let utf16 = value.encode_utf16().collect::<Vec<_>>();
let mut out = Vec::new();
out.extend_from_slice(&(utf16.len() as u32).to_le_bytes());
for unit in utf16 {
out.extend_from_slice(&unit.to_le_bytes());
}
out
}
#[test]
fn parses_external_link_rel_id_and_sheet_names() {
let mut sup_book = Vec::new();
sup_book.extend_from_slice(&0u16.to_le_bytes());
sup_book.extend_from_slice(&encode_wide_string("rId7"));
let mut extern_sheet = Vec::new();
extern_sheet.extend_from_slice(&5u32.to_le_bytes());
extern_sheet.extend_from_slice(&encode_wide_string("ExternalSheet"));
let mut data = Vec::new();
data.extend_from_slice(&encode_record(BRT_BEGIN_SUP_BOOK.as_u16(), &sup_book));
data.extend_from_slice(&encode_record(
BRT_BEGIN_EXTERN_SHEET.as_u16(),
&extern_sheet,
));
let parsed = parse_external_link(&data).expect("external link should parse");
assert_eq!(parsed.rel_id.as_deref(), Some("rId7"));
assert_eq!(parsed.sheet_names, vec!["ExternalSheet".to_string()]);
}
}