use crate::formula::mtef::constants::*;
use super::objects::*;
use crate::formula::mtef::MtefError;
use zerocopy::{FromBytes, LE, I16, U16};
pub struct MtefBinaryParser<'arena> {
#[allow(dead_code)]
arena: &'arena bumpalo::Bump,
data: &'arena [u8],
pos: usize,
pub mtef_version: u8,
pub platform: u8,
pub product: u8,
pub version: u8,
pub version_sub: u8,
pub inline: u8,
pub mode: i32,
}
impl<'arena> MtefBinaryParser<'arena> {
fn get_attribute(&mut self) -> Result<u8, MtefError> {
if self.mtef_version < 5 {
let byte = self.read_u8()?;
Ok((byte & 0xF0) >> 4) } else {
self.read_u8()
}
}
fn get_nudge(&mut self) -> Result<(i16, i16), MtefError> {
let b1 = self.read_u8()?;
let b2 = self.read_u8()?;
if b1 == 128 && b2 == 128 {
let x = self.read_i16()?;
let y = self.read_i16()?;
Ok((x, y))
} else {
Ok((b1 as i16, b2 as i16))
}
}
pub fn new(arena: &'arena bumpalo::Bump, data: &'arena [u8]) -> Result<Self, MtefError> {
if data.len() < 28 {
return Err(MtefError::InvalidFormat("Data too short for OLE header".to_string()));
}
let cb_hdr = u16::from_le_bytes([data[0], data[1]]);
let version = u32::from_le_bytes([data[2], data[3], data[4], data[5]]);
if cb_hdr != 28 {
return Err(MtefError::InvalidFormat(format!("Invalid OLE header length: {}", cb_hdr)));
}
if version != 0x00020000 && version != 0x00000200 {
return Err(MtefError::InvalidFormat(format!("Invalid OLE version: 0x{:08X}", version)));
}
let mut parser = Self {
arena,
data,
pos: 28,
mtef_version: 0,
platform: 0,
product: 0,
version: 0,
version_sub: 0,
inline: 0,
mode: EQN_MODE_DISPLAY, };
parser.read_mtef_header()?;
Ok(parser)
}
fn read_mtef_header(&mut self) -> Result<(), MtefError> {
if self.data.len() < self.pos + 5 {
return Err(MtefError::UnexpectedEof);
}
let has_signature = self.pos + 4 <= self.data.len() &&
self.data[self.pos] == 0x28 &&
self.data[self.pos + 1] == 0x04 &&
self.data[self.pos + 2] == 0x6D &&
self.data[self.pos + 3] == 0x74;
if has_signature {
self.pos += 4;
self.mtef_version = self.read_u8()?;
} else {
self.mtef_version = self.read_u8()?;
}
match self.mtef_version {
0 => {
self.mtef_version = 5;
self.platform = 0;
self.product = 0;
self.version = 0;
self.version_sub = 0;
}
1 | 101 => {
self.platform = if self.mtef_version == 101 { 1 } else { 0 };
self.product = 0;
self.version = 1;
self.version_sub = 0;
}
2..=4 => {
self.platform = self.read_u8()?;
self.product = self.read_u8()?;
self.version = self.read_u8()?;
self.version_sub = self.read_u8()?;
}
5 => {
self.platform = self.read_u8()?;
self.product = self.read_u8()?;
self.version = self.read_u8()?;
self.version_sub = self.read_u8()?;
while self.pos < self.data.len() && self.data[self.pos] != 0 {
self.pos += 1;
}
if self.pos >= self.data.len() {
return Err(MtefError::UnexpectedEof);
}
self.pos += 1;
self.inline = self.read_u8()?;
}
_ => {
return Err(MtefError::InvalidFormat(format!("Unsupported MTEF version: {}", self.mtef_version)));
}
}
Ok(())
}
pub fn parse(&mut self) -> Result<Vec<crate::formula::ast::MathNode<'arena>>, MtefError> {
let object_list = self.parse_object_list(2)?;
if let Some(obj_list) = object_list {
self.convert_objects_to_ast(&obj_list)
} else {
Ok(Vec::new())
}
}
fn parse_object_list(&mut self, num_objs: usize) -> Result<Option<Box<MtefObjectList>>, MtefError> {
let mut head: Option<Box<MtefObjectList>> = None;
let mut curr: Option<*mut MtefObjectList> = None;
let mut tally = 0;
let start_pos = self.pos;
let mut iterations = 0;
const MAX_ITERATIONS: usize = 10000;
loop {
if self.pos >= self.data.len() {
break;
}
iterations += 1;
if iterations > MAX_ITERATIONS {
return Err(MtefError::ParseError(format!(
"Too many objects parsed (>{}), possible infinite loop at position {}",
MAX_ITERATIONS, start_pos
)));
}
let curr_tag = if self.mtef_version == 5 {
self.data[self.pos]
} else {
self.data[self.pos] & 0x0F
};
if curr_tag == crate::formula::mtef::constants::END {
self.pos += 1;
break;
}
let record_type = match curr_tag {
crate::formula::mtef::constants::LINE => MtefRecordType::Line,
crate::formula::mtef::constants::CHAR => MtefRecordType::Char,
crate::formula::mtef::constants::TMPL => MtefRecordType::Tmpl,
crate::formula::mtef::constants::PILE => MtefRecordType::Pile,
crate::formula::mtef::constants::MATRIX => MtefRecordType::Matrix,
crate::formula::mtef::constants::EMBELL => MtefRecordType::Embell,
crate::formula::mtef::constants::RULER => MtefRecordType::Ruler,
crate::formula::mtef::constants::FONT => MtefRecordType::Font,
crate::formula::mtef::constants::SIZE => MtefRecordType::Size,
crate::formula::mtef::constants::FULL => MtefRecordType::Full,
crate::formula::mtef::constants::SUB => MtefRecordType::Sub,
crate::formula::mtef::constants::SUB2 => MtefRecordType::Sub2,
crate::formula::mtef::constants::SYM => MtefRecordType::Sym,
crate::formula::mtef::constants::SUBSYM => MtefRecordType::SubSym,
crate::formula::mtef::constants::COLOR => MtefRecordType::Color,
crate::formula::mtef::constants::COLOR_DEF => MtefRecordType::ColorDef,
crate::formula::mtef::constants::FONT_DEF => MtefRecordType::FontDef,
crate::formula::mtef::constants::EQN_PREFS => MtefRecordType::EqnPrefs,
crate::formula::mtef::constants::ENCODING_DEF => MtefRecordType::EncodingDef,
_ => MtefRecordType::Future,
};
let obj_ptr: Option<Box<dyn MtefObject>> = match record_type {
MtefRecordType::Char => match self.parse_char() {
Ok(obj) => Some(Box::new(obj)),
Err(MtefError::UnexpectedEof) => break, Err(e) => return Err(e),
},
MtefRecordType::Tmpl => match self.parse_template() {
Ok(obj) => Some(Box::new(obj)),
Err(MtefError::UnexpectedEof) => break,
Err(e) => return Err(e),
},
MtefRecordType::Line => match self.parse_line() {
Ok(obj) => Some(Box::new(obj)),
Err(MtefError::UnexpectedEof) => break,
Err(e) => return Err(e),
},
MtefRecordType::Pile => match self.parse_pile() {
Ok(obj) => Some(Box::new(obj)),
Err(MtefError::UnexpectedEof) => break,
Err(e) => return Err(e),
},
MtefRecordType::Matrix => match self.parse_matrix() {
Ok(obj) => Some(Box::new(obj)),
Err(MtefError::UnexpectedEof) => break,
Err(e) => return Err(e),
},
MtefRecordType::Embell => match self.parse_embell() {
Ok(obj) => Some(Box::new(obj)),
Err(MtefError::UnexpectedEof) => break,
Err(e) => return Err(e),
},
MtefRecordType::Ruler => match self.parse_ruler() {
Ok(obj) => Some(Box::new(obj)),
Err(MtefError::UnexpectedEof) => break,
Err(e) => return Err(e),
},
MtefRecordType::Font => match self.parse_font() {
Ok(obj) => Some(Box::new(obj)),
Err(MtefError::UnexpectedEof) => break,
Err(e) => return Err(e),
},
MtefRecordType::Size | MtefRecordType::Full | MtefRecordType::Sub |
MtefRecordType::Sub2 | MtefRecordType::Sym | MtefRecordType::SubSym => {
match self.parse_size() {
Ok(obj) => Some(Box::new(obj)),
Err(MtefError::UnexpectedEof) => break,
Err(e) => return Err(e),
}
}
MtefRecordType::ColorDef => {
if self.pos < self.data.len() {
self.pos += 1;
}
None
}
MtefRecordType::FontDef => {
if self.skip_font_def().is_err() {
break; }
None
}
MtefRecordType::EqnPrefs => {
if self.skip_eqn_prefs().is_err() {
break; }
None
}
MtefRecordType::EncodingDef => {
if self.skip_encoding_def().is_err() {
break; }
None
}
MtefRecordType::Future => {
if self.skip_future_record().is_err() {
break; }
None
}
_ => {
if self.skip_unknown_record().is_err() {
break; }
None
}
};
if let Some(obj) = obj_ptr {
let new_node = Box::new(MtefObjectList {
tag: record_type,
obj_ptr: obj,
next: None,
});
match curr {
Some(curr_ptr) => unsafe {
(*curr_ptr).next = Some(new_node);
curr = (*curr_ptr).next.as_mut().map(|n| n.as_mut() as *mut _);
},
None => {
head = Some(new_node);
curr = head.as_mut().map(|n| n.as_mut() as *mut _);
}
}
tally += 1;
if num_objs > 0 && tally == num_objs {
break;
}
}
}
Ok(head)
}
fn parse_char(&mut self) -> Result<MtefChar, MtefError> {
let attrs = self.get_attribute()?;
let mut nudge_x = 0i16;
let mut nudge_y = 0i16;
if attrs & CHAR_NUDGE != 0 {
let nudge_result = self.get_nudge()?;
nudge_x = nudge_result.0;
nudge_y = nudge_result.1;
}
let typeface = self.read_u8()?;
let mut character = 0u16;
let mut bits16 = 0u16;
if self.mtef_version < 5 {
character = self.read_u8()? as u16;
if self.platform == 1 { character |= (self.read_u8()? as u16) << 8;
}
} else {
if attrs & CHAR_ENC_NO_MTCODE == 0 {
character = self.read_u16()?;
}
if attrs & CHAR_ENC_CHAR_8 != 0 {
character = self.read_u8()? as u16;
}
if attrs & CHAR_ENC_CHAR_16 != 0 {
bits16 = self.read_u16()?;
}
}
let embellishment_list = if self.mtef_version == 5 {
if attrs & CHAR_EMBELL != 0 {
Some(Box::new(self.parse_embell()?))
} else {
None
}
} else if attrs & crate::formula::mtef::constants::XF_EMBELL != 0 {
Some(Box::new(self.parse_embell()?))
} else {
None
};
Ok(MtefChar {
nudge_x,
nudge_y,
atts: attrs,
typeface,
character,
bits16,
embellishment_list,
})
}
fn parse_template(&mut self) -> Result<MtefTemplate, MtefError> {
let attrs = self.get_attribute()?;
let mut nudge_x = 0i16;
let mut nudge_y = 0i16;
if attrs & XF_LMOVE != 0 {
let nudge_result = self.get_nudge()?;
nudge_x = nudge_result.0;
nudge_y = nudge_result.1;
}
let selector = self.read_u8()?;
let mut variation = self.read_u8()? as u16;
if self.mtef_version == 5 && (variation & 0x80) != 0 {
variation &= 0x7F;
variation |= (self.read_u8()? as u16) << 7;
}
let options = self.read_u8()?;
let subobject_list = if attrs & XF_NULL != 0 {
None
} else {
self.parse_object_list(0)?
};
Ok(MtefTemplate {
nudge_x,
nudge_y,
selector,
variation,
options,
subobject_list,
})
}
fn parse_line(&mut self) -> Result<MtefLine, MtefError> {
let attrs = self.get_attribute()?;
let mut nudge_x = 0i16;
let mut nudge_y = 0i16;
if attrs & XF_LMOVE != 0 {
let nudge_result = self.get_nudge()?;
nudge_x = nudge_result.0;
nudge_y = nudge_result.1;
}
let line_spacing = if attrs & XF_LSPACE != 0 {
self.read_u8()?
} else {
0
};
let ruler = if attrs & XF_RULER != 0 {
Some(Box::new(self.parse_ruler()?))
} else {
None
};
let object_list = self.parse_object_list(0)?;
Ok(MtefLine {
nudge_x,
nudge_y,
line_spacing,
ruler,
object_list,
})
}
fn parse_pile(&mut self) -> Result<MtefPile, MtefError> {
let attrs = self.get_attribute()?;
let mut nudge_x = 0i16;
let mut nudge_y = 0i16;
if attrs & XF_LMOVE != 0 {
let nudge_result = self.get_nudge()?;
nudge_x = nudge_result.0;
nudge_y = nudge_result.1;
}
let halign = self.read_u8()?;
let valign = self.read_u8()?;
let ruler = if attrs & XF_RULER != 0 {
Some(Box::new(self.parse_ruler()?))
} else {
None
};
let line_list = self.parse_object_list(0)?;
Ok(MtefPile {
nudge_x,
nudge_y,
halign,
valign,
ruler,
line_list,
})
}
fn parse_matrix(&mut self) -> Result<MtefMatrix, MtefError> {
let attrs = self.get_attribute()?;
let mut nudge_x = 0i16;
let mut nudge_y = 0i16;
if attrs & XF_LMOVE != 0 {
let nudge_result = self.get_nudge()?;
nudge_x = nudge_result.0;
nudge_y = nudge_result.1;
}
let valign = self.read_u8()?;
let h_just = self.read_u8()?;
let v_just = self.read_u8()?;
let rows = self.read_u8()?;
let cols = self.read_u8()?;
let mut row_parts = [0u8; 16];
let mut col_parts = [0u8; 16];
let row_bytes = (2 * (rows as usize + 1)).div_ceil(8);
for i in 0..row_bytes {
if i < row_parts.len() {
row_parts[i] = self.read_u8()?;
}
}
let col_bytes = (2 * (cols as usize + 1)).div_ceil(8);
for i in 0..col_bytes {
if i < col_parts.len() {
col_parts[i] = self.read_u8()?;
}
}
let element_list = self.parse_object_list(0)?;
Ok(MtefMatrix {
nudge_x,
nudge_y,
valign,
h_just,
v_just,
rows,
cols,
row_parts,
col_parts,
element_list,
})
}
fn parse_embell(&mut self) -> Result<MtefEmbell, MtefError> {
let attrs = self.get_attribute()?;
let mut nudge_x = 0i16;
let mut nudge_y = 0i16;
if attrs & XF_LMOVE != 0 {
let nudge_result = self.get_nudge()?;
nudge_x = nudge_result.0;
nudge_y = nudge_result.1;
}
let embell = self.read_u8()?;
Ok(MtefEmbell {
nudge_x,
nudge_y,
embell,
next: None, })
}
fn parse_ruler(&mut self) -> Result<MtefRuler, MtefError> {
let tag = if self.mtef_version == 5 {
self.data[self.pos]
} else {
self.data[self.pos] & 0x0F
};
if tag == crate::formula::mtef::constants::RULER {
self.pos += 1; }
let n_stops = self.read_u8()? as i16;
let mut head: Option<Box<MtefTabstop>> = None;
let mut curr: Option<*mut MtefTabstop> = None;
for _ in 0..n_stops {
let r#type = self.read_u8()? as i16;
let offset = self.read_i16()?;
let new_tabstop = Box::new(MtefTabstop {
r#type,
offset,
next: None,
});
match curr {
Some(curr_ptr) => unsafe {
(*curr_ptr).next = Some(new_tabstop);
curr = Some((*curr_ptr).next.as_mut().unwrap().as_mut() as *mut _);
},
None => {
head = Some(new_tabstop);
curr = head.as_mut().map(|n| n.as_mut() as *mut _);
}
}
}
Ok(MtefRuler {
n_stops,
tabstop_list: head,
})
}
fn parse_font(&mut self) -> Result<MtefFont, MtefError> {
let tface = self.read_u8()? as i32;
let style = self.read_u8()? as i32;
let start_pos = self.pos;
while self.pos < self.data.len() && self.data[self.pos] != 0 {
self.pos += 1;
}
if self.pos >= self.data.len() {
return Err(MtefError::UnexpectedEof);
}
let font_name = std::str::from_utf8(&self.data[start_pos..self.pos])
.map_err(|_| MtefError::ParseError("Invalid font name encoding".to_string()))?
.to_string();
self.pos += 1;
Ok(MtefFont {
tface,
style,
zname: font_name,
})
}
fn parse_size(&mut self) -> Result<MtefSize, MtefError> {
let tag = self.read_u8()? & 0x0F;
if (FULL..=SUBSYM).contains(&tag) {
return Ok(MtefSize {
r#type: tag as i32,
lsize: (tag - FULL) as i32,
dsize: 0,
});
}
let option = self.read_u8()?;
if option == 100 {
let lsize = self.read_u8()? as i32;
let mut dsize = self.read_u8()? as i32;
dsize += (self.read_u8()? as i32) << 8;
return Ok(MtefSize {
r#type: option as i32,
lsize,
dsize,
});
}
if option == 101 {
let mut lsize = self.read_u8()? as i32;
lsize += (self.read_u8()? as i32) << 8;
return Ok(MtefSize {
r#type: option as i32,
lsize,
dsize: 0,
});
}
let dsize = (self.read_u8()? as i32) - 128;
Ok(MtefSize {
r#type: 0,
lsize: option as i32,
dsize,
})
}
fn skip_font_def(&mut self) -> Result<(), MtefError> {
self.pos += 1; let _id = self.read_u8()?;
while self.pos < self.data.len() && self.data[self.pos] != 0 {
self.pos += 1;
}
self.pos += 1; Ok(())
}
fn skip_eqn_prefs(&mut self) -> Result<(), MtefError> {
self.pos += 1; let _options = self.read_u8()?;
let size_count = self.read_u8()? as usize;
self.pos += self.skip_nibbles(size_count)?;
let space_count = self.read_u8()? as usize;
self.pos += self.skip_nibbles(space_count)?;
let style_count = self.read_u8()? as usize;
for _ in 0..style_count {
let c = self.read_u8()?;
if c != 0 {
self.pos += 1; }
}
Ok(())
}
fn skip_encoding_def(&mut self) -> Result<(), MtefError> {
self.pos += 1; while self.pos < self.data.len() && self.data[self.pos] != 0 {
self.pos += 1;
}
self.pos += 1; Ok(())
}
fn skip_future_record(&mut self) -> Result<(), MtefError> {
self.pos += 1; let size = self.read_u16()? as usize;
self.pos += size;
Ok(())
}
fn skip_unknown_record(&mut self) -> Result<(), MtefError> {
self.pos += 1; let size = self.read_u16()? as usize;
self.pos += size;
Ok(())
}
fn skip_nibbles(&mut self, count: usize) -> Result<usize, MtefError> {
let bytes = count.div_ceil(2); for _ in 0..bytes {
self.read_u8()?;
}
Ok(bytes)
}
#[inline]
fn read_u8(&mut self) -> Result<u8, MtefError> {
if self.pos >= self.data.len() {
return Err(MtefError::UnexpectedEof);
}
let val = unsafe { *self.data.get_unchecked(self.pos) };
self.pos += 1;
Ok(val)
}
#[inline]
fn read_i16(&mut self) -> Result<i16, MtefError> {
if self.pos + 2 > self.data.len() {
return Err(MtefError::UnexpectedEof);
}
let val = I16::<LE>::read_from_bytes(&self.data[self.pos..self.pos + 2])
.map_err(|_| MtefError::InvalidFormat("Failed to read i16".to_string()))?
.get();
self.pos += 2;
Ok(val)
}
#[inline]
fn read_u16(&mut self) -> Result<u16, MtefError> {
if self.pos + 2 > self.data.len() {
return Err(MtefError::UnexpectedEof);
}
let val = U16::<LE>::read_from_bytes(&self.data[self.pos..self.pos + 2])
.map_err(|_| MtefError::InvalidFormat("Failed to read u16".to_string()))?
.get();
self.pos += 2;
Ok(val)
}
}