use crate::container::Container;
use crate::error::Error;
use crate::record::{
RecordIter, HWPTAG_BIN_DATA, HWPTAG_CHAR_SHAPE, HWPTAG_DISTRIBUTE_DOC_DATA,
HWPTAG_DOCUMENT_PROPERTIES, HWPTAG_FACE_NAME, HWPTAG_PARA_SHAPE, HWPTAG_STYLE,
};
use crate::shape::{parse_char_shape, parse_para_shape, ShapeTables};
pub fn read_shape_tables(container: &mut Container) -> Result<ShapeTables, Error> {
let bytes = container.read_raw_stream("/DocInfo")?;
let mut tables = ShapeTables::default();
let (mut next_char_id, mut next_para_id) = (0u32, 0u32);
for rec in RecordIter::new(&bytes) {
let rec = rec?;
match rec.header.tag_id {
HWPTAG_CHAR_SHAPE => {
if let Ok(cs) = parse_char_shape(rec.payload) {
tables.char_shapes.insert(next_char_id, cs);
}
next_char_id += 1;
}
HWPTAG_PARA_SHAPE => {
if let Ok(ps) = parse_para_shape(rec.payload) {
tables.para_shapes.insert(next_para_id, ps);
}
next_para_id += 1;
}
HWPTAG_FACE_NAME => {
if let Ok(f) = crate::faces::parse_face_name(rec.payload) {
tables.faces.push(f);
}
}
HWPTAG_STYLE => {
if let Ok(s) = crate::styles::parse_style(rec.payload) {
tables.styles.push(s);
}
}
_ => {}
}
}
for cs in tables.char_shapes.values_mut() {
for i in 0..7 {
if let Some(f) = tables.faces.get(cs.face_ids[i] as usize) {
cs.face_names[i] = f.name.clone();
}
}
}
Ok(tables)
}
pub fn read_asset_catalog(container: &mut Container) -> Result<crate::assets::AssetCatalog, Error> {
let bytes = container.read_raw_stream("/DocInfo")?;
let mut entries = Vec::new();
let mut positional = 1u16;
for rec in RecordIter::new(&bytes) {
let rec = rec?;
if rec.header.tag_id == HWPTAG_BIN_DATA {
if let Ok(mut e) = crate::assets::parse_bin_data(rec.payload, positional) {
if e.kind == "EMBEDDING" {
let stream_name = format!("/BinData/BIN{:04X}.{}", e.id, e.format);
e.size_bytes = container.stream_size(&stream_name);
}
entries.push(e);
}
positional += 1;
}
}
Ok(crate::assets::AssetCatalog { entries })
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, serde::Serialize)]
pub struct DocumentProperties {
pub section_count: u16,
pub page_start: u16,
pub footnote_start: u16,
pub endnote_start: u16,
pub picture_start: u16,
pub table_start: u16,
pub equation_start: u16,
pub caret_section: u32,
pub caret_position: u32,
}
pub fn read(container: &mut Container) -> Result<DocumentProperties, Error> {
let bytes = container.read_raw_stream("/DocInfo")?;
for rec in RecordIter::new(&bytes) {
let rec = rec?;
if rec.header.tag_id == HWPTAG_DOCUMENT_PROPERTIES {
return parse(rec.payload);
}
}
Err(Error::Record(
"DocInfo missing HWPTAG_DOCUMENT_PROPERTIES".into(),
))
}
fn parse(p: &[u8]) -> Result<DocumentProperties, Error> {
if p.len() < 22 {
return Err(Error::Record(format!(
"DocumentProperties too short: {}",
p.len()
)));
}
let u16_at = |o: usize| u16::from_le_bytes(p[o..o + 2].try_into().unwrap());
let u32_at = |o: usize| u32::from_le_bytes(p[o..o + 4].try_into().unwrap());
Ok(DocumentProperties {
section_count: u16_at(0),
page_start: u16_at(2),
footnote_start: u16_at(4),
endnote_start: u16_at(6),
picture_start: u16_at(8),
table_start: u16_at(10),
equation_start: u16_at(12),
caret_section: u32_at(14),
caret_position: u32_at(18),
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parses_fixed_layout() {
let mut p = Vec::new();
p.extend_from_slice(&3u16.to_le_bytes());
p.extend_from_slice(&1u16.to_le_bytes());
p.extend_from_slice(&1u16.to_le_bytes());
p.extend_from_slice(&1u16.to_le_bytes());
p.extend_from_slice(&1u16.to_le_bytes());
p.extend_from_slice(&1u16.to_le_bytes());
p.extend_from_slice(&1u16.to_le_bytes());
p.extend_from_slice(&0u32.to_le_bytes());
p.extend_from_slice(&42u32.to_le_bytes());
p.extend_from_slice(&[0u8; 8]);
let d = parse(&p).unwrap();
assert_eq!(d.section_count, 3);
assert_eq!(d.caret_position, 42);
}
#[test]
fn rejects_short_payload() {
let p = vec![0u8; 10];
assert!(parse(&p).is_err());
}
}
pub fn find_distribute_doc_data(doc_info_bytes: &[u8]) -> Option<Vec<u8>> {
for rec in RecordIter::new(doc_info_bytes) {
let rec = rec.ok()?;
if rec.header.tag_id == HWPTAG_DISTRIBUTE_DOC_DATA {
return Some(rec.payload.to_vec());
}
}
None
}
#[cfg(test)]
mod distribute_tests {
use super::*;
use crate::record::{HWPTAG_DISTRIBUTE_DOC_DATA, HWPTAG_DOCUMENT_PROPERTIES};
fn encode_header(tag: u16, level: u16, size: u32) -> Vec<u8> {
let word = (tag as u32 & 0x3FF) | ((level as u32 & 0x3FF) << 10) | (size.min(0xFFF) << 20);
let mut v = word.to_le_bytes().to_vec();
if size >= 0xFFF {
v.extend_from_slice(&size.to_le_bytes());
}
v
}
#[test]
fn finds_record_when_present() {
let payload = vec![0xAAu8; 256];
let mut buf = encode_header(HWPTAG_DISTRIBUTE_DOC_DATA, 0, 256);
buf.extend_from_slice(&payload);
let out = find_distribute_doc_data(&buf).unwrap();
assert_eq!(out, payload);
}
#[test]
fn returns_none_when_absent() {
let mut buf = encode_header(HWPTAG_DOCUMENT_PROPERTIES, 0, 30);
buf.extend_from_slice(&[0u8; 30]);
assert!(find_distribute_doc_data(&buf).is_none());
}
}
#[cfg(test)]
mod asset_tests {
use crate::assets::parse_bin_data;
fn encode_wstr(s: &str) -> Vec<u8> {
let units: Vec<u16> = s.encode_utf16().collect();
let mut v = Vec::new();
v.extend_from_slice(&(units.len() as u16).to_le_bytes());
for u in units {
v.extend_from_slice(&u.to_le_bytes());
}
v
}
#[test]
fn parses_embedding_bin_data_record() {
let mut p = Vec::new();
p.extend_from_slice(&1u16.to_le_bytes()); p.extend_from_slice(&encode_wstr("BIN0001.png"));
p.extend_from_slice(&encode_wstr("png"));
let e = parse_bin_data(&p, 1).unwrap();
assert_eq!(e.id, 1);
assert_eq!(e.kind, "EMBEDDING");
assert_eq!(e.format, "png");
assert!(e.size_bytes.is_none());
}
}