use crate::error::{Error, Result};
#[repr(u16)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TagId {
DocumentProperties = 16,
IdMappings = 17,
BinData = 18,
FaceName = 19,
BorderFill = 20,
CharShape = 21,
TabDef = 22,
Numbering = 23,
Bullet = 24,
ParaShape = 25,
Style = 26,
DocData = 27,
DistributeDocData = 28,
CompatibleDocument = 30,
LayoutCompatibility = 31,
ParaHeader = 66,
ParaText = 67,
ParaCharShape = 68,
ParaLineSeg = 69,
ParaRangeTag = 70,
CtrlHeader = 71,
ListHeader = 72,
PageDef = 73,
FootnoteShape = 74,
PageBorderFill = 75,
ShapeComponent = 76,
Table = 77,
ShapeComponentLine = 78,
ShapeComponentRectangle = 79,
ShapeComponentEllipse = 80,
ShapeComponentArc = 81,
ShapeComponentPolygon = 82,
ShapeComponentCurve = 83,
ShapeComponentOle = 84,
ShapeComponentPicture = 85,
ShapeComponentContainer = 86,
CtrlData = 87,
EqEdit = 88,
Unknown = 0xFFFF,
}
impl From<u16> for TagId {
fn from(value: u16) -> Self {
match value {
16 => TagId::DocumentProperties,
17 => TagId::IdMappings,
18 => TagId::BinData,
19 => TagId::FaceName,
20 => TagId::BorderFill,
21 => TagId::CharShape,
22 => TagId::TabDef,
23 => TagId::Numbering,
24 => TagId::Bullet,
25 => TagId::ParaShape,
26 => TagId::Style,
27 => TagId::DocData,
28 => TagId::DistributeDocData,
30 => TagId::CompatibleDocument,
31 => TagId::LayoutCompatibility,
66 => TagId::ParaHeader,
67 => TagId::ParaText,
68 => TagId::ParaCharShape,
69 => TagId::ParaLineSeg,
70 => TagId::ParaRangeTag,
71 => TagId::CtrlHeader,
72 => TagId::ListHeader,
73 => TagId::PageDef,
74 => TagId::FootnoteShape,
75 => TagId::PageBorderFill,
76 => TagId::ShapeComponent,
77 => TagId::Table,
78 => TagId::ShapeComponentLine,
79 => TagId::ShapeComponentRectangle,
80 => TagId::ShapeComponentEllipse,
81 => TagId::ShapeComponentArc,
82 => TagId::ShapeComponentPolygon,
83 => TagId::ShapeComponentCurve,
84 => TagId::ShapeComponentOle,
85 => TagId::ShapeComponentPicture,
86 => TagId::ShapeComponentContainer,
87 => TagId::CtrlData,
88 => TagId::EqEdit,
_ => TagId::Unknown,
}
}
}
#[derive(Debug, Clone, Copy)]
pub struct RecordHeader {
pub tag_id: u16,
pub level: u16,
pub size: u32,
}
impl RecordHeader {
pub const SIZE: usize = 4;
pub const EXTENDED_SIZE_SENTINEL: u32 = 0xFFF;
pub fn parse(data: &[u8]) -> Result<(Self, usize)> {
if data.len() < 4 {
return Err(Error::InvalidData("Record header too small".into()));
}
let header_value = u32::from_le_bytes([data[0], data[1], data[2], data[3]]);
let tag_id = (header_value & 0x3FF) as u16;
let level = ((header_value >> 10) & 0x3FF) as u16;
let size_field = (header_value >> 20) & 0xFFF;
let (size, consumed) = if size_field == Self::EXTENDED_SIZE_SENTINEL {
if data.len() < 8 {
return Err(Error::InvalidData(
"Extended record header too small".into(),
));
}
let extended_size = u32::from_le_bytes([data[4], data[5], data[6], data[7]]);
(extended_size, 8)
} else {
(size_field, 4)
};
Ok((
Self {
tag_id,
level,
size,
},
consumed,
))
}
pub fn tag(&self) -> TagId {
TagId::from(self.tag_id)
}
}
#[derive(Debug, Clone)]
pub struct Record {
pub header: RecordHeader,
pub data: Vec<u8>,
pub offset: u64,
}
impl Record {
pub fn tag(&self) -> TagId {
self.header.tag()
}
pub fn tag_id(&self) -> u16 {
self.header.tag_id
}
pub fn level(&self) -> u16 {
self.header.level
}
pub fn size(&self) -> u32 {
self.header.size
}
pub fn data(&self) -> &[u8] {
&self.data
}
pub fn read_u8(&self, offset: usize) -> Result<u8> {
self.data
.get(offset)
.copied()
.ok_or_else(|| Error::InvalidData("Read past end of record".into()))
}
pub fn read_u16(&self, offset: usize) -> Result<u16> {
if offset + 2 > self.data.len() {
return Err(Error::InvalidData("Read past end of record".into()));
}
Ok(u16::from_le_bytes([
self.data[offset],
self.data[offset + 1],
]))
}
pub fn read_u32(&self, offset: usize) -> Result<u32> {
if offset + 4 > self.data.len() {
return Err(Error::InvalidData("Read past end of record".into()));
}
Ok(u32::from_le_bytes([
self.data[offset],
self.data[offset + 1],
self.data[offset + 2],
self.data[offset + 3],
]))
}
pub fn read_i32(&self, offset: usize) -> Result<i32> {
if offset + 4 > self.data.len() {
return Err(Error::InvalidData("Read past end of record".into()));
}
Ok(i32::from_le_bytes([
self.data[offset],
self.data[offset + 1],
self.data[offset + 2],
self.data[offset + 3],
]))
}
}
pub struct RecordIterator<'a> {
data: &'a [u8],
position: usize,
}
impl<'a> RecordIterator<'a> {
pub fn new(data: &'a [u8]) -> Self {
Self { data, position: 0 }
}
pub fn position(&self) -> usize {
self.position
}
pub fn has_more(&self) -> bool {
self.position + 4 <= self.data.len()
}
}
impl<'a> Iterator for RecordIterator<'a> {
type Item = Result<Record>;
fn next(&mut self) -> Option<Self::Item> {
if !self.has_more() {
return None;
}
let offset = self.position as u64;
let (header, header_size) = match RecordHeader::parse(&self.data[self.position..]) {
Ok(h) => h,
Err(e) => return Some(Err(e)),
};
self.position += header_size;
let data_end = self.position + header.size as usize;
if data_end > self.data.len() {
return Some(Err(Error::RecordParse {
offset,
message: format!(
"Record data exceeds stream bounds: {} + {} > {}",
self.position,
header.size,
self.data.len()
),
}));
}
let data = self.data[self.position..data_end].to_vec();
self.position = data_end;
Some(Ok(Record {
header,
data,
offset,
}))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_record_header() {
let data = [0x42, 0x00, 0xA0, 0x00];
let (header, consumed) = RecordHeader::parse(&data).unwrap();
assert_eq!(consumed, 4);
assert_eq!(header.tag_id, 66);
assert_eq!(header.level, 0);
assert_eq!(header.size, 10);
assert_eq!(header.tag(), TagId::ParaHeader);
}
#[test]
fn test_parse_extended_record_header() {
let data = [0x43, 0x00, 0xF0, 0xFF, 0x88, 0x13, 0x00, 0x00];
let (header, consumed) = RecordHeader::parse(&data).unwrap();
assert_eq!(consumed, 8);
assert_eq!(header.tag_id, 67);
assert_eq!(header.size, 5000);
assert_eq!(header.tag(), TagId::ParaText);
}
#[test]
fn test_record_iterator() {
let mut data = Vec::new();
data.extend_from_slice(&[0x42, 0x00, 0x20, 0x00]); data.extend_from_slice(&[0xAA, 0xBB]);
data.extend_from_slice(&[0x43, 0x00, 0x30, 0x00]); data.extend_from_slice(&[0xCC, 0xDD, 0xEE]);
let records: Vec<_> = RecordIterator::new(&data).collect();
assert_eq!(records.len(), 2);
let r1 = records[0].as_ref().unwrap();
assert_eq!(r1.tag_id(), 66);
assert_eq!(r1.tag(), TagId::ParaHeader);
assert_eq!(r1.data(), &[0xAA, 0xBB]);
let r2 = records[1].as_ref().unwrap();
assert_eq!(r2.tag_id(), 67);
assert_eq!(r2.tag(), TagId::ParaText);
assert_eq!(r2.data(), &[0xCC, 0xDD, 0xEE]);
}
}