use xlsbye_core::error::{Result, XlsByeError};
use xlsbye_core::types::{CellRef, Comment, ParsedComments, RichTextRun};
use crate::record::cursor::RecordCursor;
use crate::record::header::RecordIter;
use crate::record::ids::{BRT_BEGIN_COMMENT, BRT_COMMENT_AUTHOR};
const BRT_REAL_COMMENT_AUTHOR: u16 = 0x0278;
const BRT_REAL_COMMENT_ANCHOR: u16 = 0x027B;
const BRT_REAL_COMMENT_TEXT: u16 = 0x027D;
pub fn parse_comments(data: &[u8]) -> Result<ParsedComments> {
let mut authors = Vec::new();
let mut comments = Vec::new();
let mut pending_comment: Option<Comment> = None;
for record in RecordIter::new(data) {
let (record_type, payload) = record?;
if (record_type == BRT_COMMENT_AUTHOR.as_u16() || record_type == BRT_REAL_COMMENT_AUTHOR)
&& !payload.is_empty()
{
let mut cursor = RecordCursor::new(payload);
authors.push(cursor.read_wide_string()?);
continue;
}
if record_type == BRT_BEGIN_COMMENT.as_u16() && !payload.is_empty() {
comments.push(parse_comment(payload)?);
continue;
}
if record_type == BRT_REAL_COMMENT_ANCHOR {
pending_comment = Some(parse_real_comment_anchor(payload)?);
continue;
}
if record_type == BRT_REAL_COMMENT_TEXT {
if let Some(mut comment) = pending_comment.take() {
comment.text = parse_real_comment_text(payload)?;
comments.push(comment);
}
}
}
Ok(ParsedComments { authors, comments })
}
fn parse_real_comment_anchor(payload: &[u8]) -> Result<Comment> {
let mut cursor = RecordCursor::new(payload);
let author_index = cursor.read_u32()?;
let row = one_based(cursor.read_u32()?, "comment row")?;
let _row_last = cursor.read_u32()?;
let col = one_based(cursor.read_u32()?, "comment column")?;
let _col_last = cursor.read_u32()?;
Ok(Comment {
cell_ref: CellRef { row, col },
author_index,
text: Vec::new(),
})
}
fn parse_real_comment_text(payload: &[u8]) -> Result<Vec<RichTextRun>> {
let mut cursor = RecordCursor::new(payload);
let flags = cursor.read_u8()?;
let char_count = usize::try_from(cursor.read_u32()?)
.map_err(|_| XlsByeError::Biff12("comment text length out of range".to_string()))?;
let utf16_bytes = cursor.read_bytes(char_count * 2)?;
let utf16 = utf16_bytes
.chunks_exact(2)
.map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
.collect::<Vec<_>>();
let text = String::from_utf16_lossy(&utf16);
if flags & 0x01 == 0 {
return Ok(vec![RichTextRun {
font_index: None,
text,
}]);
}
let run_count = usize::try_from(cursor.read_u32()?)
.map_err(|_| XlsByeError::Biff12("comment rich text run count out of range".to_string()))?;
let mut runs = Vec::with_capacity(run_count);
for _ in 0..run_count {
let start_index = usize::from(cursor.read_u16()?);
let font_index = cursor.read_u16()?;
runs.push((start_index, font_index));
}
let mut rich_runs = Vec::with_capacity(runs.len() + 1);
if let Some((first_start, _)) = runs.first() {
if *first_start > 0 {
rich_runs.push(RichTextRun {
font_index: None,
text: String::from_utf16_lossy(&utf16[..*first_start]),
});
}
}
for (index, (start, font_index)) in runs.iter().enumerate() {
let end = runs.get(index + 1).map(|(next_start, _)| *next_start).unwrap_or(utf16.len());
rich_runs.push(RichTextRun {
font_index: Some(u32::from(*font_index)),
text: String::from_utf16_lossy(&utf16[*start..end]),
});
}
Ok(rich_runs)
}
fn parse_comment(payload: &[u8]) -> Result<Comment> {
let mut cursor = RecordCursor::new(payload);
let author_index = cursor.read_u32()?;
let row = one_based(cursor.read_u32()?, "comment row")?;
let _row_last = cursor.read_u32()?;
let col = one_based(cursor.read_u32()?, "comment column")?;
let _col_last = cursor.read_u32()?;
let text = if cursor.is_empty() {
Vec::new()
} else {
vec![RichTextRun {
font_index: None,
text: cursor.read_wide_string()?,
}]
};
Ok(Comment {
cell_ref: CellRef { row, col },
author_index,
text,
})
}
fn one_based(value: u32, field: &str) -> Result<u32> {
value.checked_add(1).ok_or_else(|| {
XlsByeError::Biff12(format!(
"{field} value {value} overflows when converting to 1-based index"
))
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::record::ids::{
BRT_BEGIN_COMMENT_AUTHORS, BRT_BEGIN_COMMENT_LIST, BRT_END_COMMENT, BRT_END_COMMENT_LIST,
};
fn encode_varint(mut value: u32) -> Vec<u8> {
let mut out = Vec::new();
loop {
let mut byte = (value & 0x7F) as u8;
value >>= 7;
if value != 0 {
byte |= 0x80;
}
out.push(byte);
if value == 0 {
break;
}
}
out
}
fn encode_record(record_type: u16, payload: &[u8]) -> Vec<u8> {
let mut out = Vec::new();
out.extend_from_slice(&encode_varint(u32::from(record_type)));
out.extend_from_slice(&encode_varint(payload.len() as u32));
out.extend_from_slice(payload);
out
}
fn encode_wide_string(value: &str) -> Vec<u8> {
let utf16 = value.encode_utf16().collect::<Vec<_>>();
let mut out = Vec::new();
out.extend_from_slice(&(utf16.len() as u32).to_le_bytes());
for unit in utf16 {
out.extend_from_slice(&unit.to_le_bytes());
}
out
}
#[test]
fn parses_authors_and_comments() {
let mut comment = Vec::new();
comment.extend_from_slice(&0u32.to_le_bytes());
comment.extend_from_slice(&0u32.to_le_bytes());
comment.extend_from_slice(&0u32.to_le_bytes());
comment.extend_from_slice(&1u32.to_le_bytes());
comment.extend_from_slice(&1u32.to_le_bytes());
comment.extend_from_slice(&encode_wide_string("Looks good"));
let mut data = Vec::new();
data.extend_from_slice(&encode_record(BRT_BEGIN_COMMENT_AUTHORS.as_u16(), &[]));
data.extend_from_slice(&encode_record(
BRT_COMMENT_AUTHOR.as_u16(),
&encode_wide_string("Alice"),
));
data.extend_from_slice(&encode_record(BRT_BEGIN_COMMENT_LIST.as_u16(), &[]));
data.extend_from_slice(&encode_record(BRT_BEGIN_COMMENT.as_u16(), &comment));
data.extend_from_slice(&encode_record(BRT_END_COMMENT.as_u16(), &[]));
data.extend_from_slice(&encode_record(BRT_END_COMMENT_LIST.as_u16(), &[]));
let parsed = parse_comments(&data).expect("comments should parse");
assert_eq!(parsed.authors, vec!["Alice".to_string()]);
assert_eq!(parsed.comments.len(), 1);
assert_eq!(parsed.comments[0].cell_ref.row, 1);
assert_eq!(parsed.comments[0].cell_ref.col, 2);
assert_eq!(parsed.comments[0].author_index, 0);
assert_eq!(parsed.comments[0].text[0].text, "Looks good");
}
}